diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcCtx.java index 0a61f12..5f53f96 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcCtx.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcCtx.java @@ -82,7 +82,7 @@ public ConstantPropagateProcCtx(ConstantPropagateOption option) { * @throws SemanticException */ private ColumnInfo resolve(ColumnInfo ci, RowSchema rs, RowSchema parentRS) { - // Resolve new ColumnInfo from + // Resolve new ColumnInfo from String alias = ci.getAlias(); if (alias == null) { alias = ci.getInternalName(); @@ -152,7 +152,6 @@ private ColumnInfo resolve(ColumnInfo ci, RowSchema rs, RowSchema parentRS) { if (e.getValue().isSame(f.getValue())) { found = true; } - break; } } if (!found) { @@ -172,12 +171,30 @@ private ColumnInfo resolve(ColumnInfo ci, RowSchema rs, RowSchema parentRS) { Map c = opToConstantExprs.get(parent); for (Entry e : c.entrySet()) { ColumnInfo ci = e.getKey(); - ColumnInfo rci = null; ExprNodeDesc constant = e.getValue(); - rci = resolve(ci, rs, parent.getSchema()); - if (rci != null) { - constants.put(rci, constant); - } else { + boolean resolved = false; + + if (op.getColumnExprMap() != null && op.getColumnExprMap().entrySet() != null) { + for (Entry entry : op.getColumnExprMap().entrySet()) { + if (entry.getValue().isSame(constant)) { + ColumnInfo rsColumnInfo = rs.getColumnInfo(entry.getKey()); + if (rsColumnInfo == null) { + continue; + } + constants.put(rsColumnInfo, constant); + resolved = true; + } + } + } + + if (!resolved) { + ColumnInfo rci = resolve(ci, rs, parent.getSchema()); + if (rci != null) { + constants.put(rci, constant); + resolved = true; + } + } + if (!resolved) { LOG.debug("Can't resolve " + ci.getTabAlias() + "." + ci.getAlias() + "(" + ci.getInternalName() + ") from rs:" + rs); } diff --git a/ql/src/test/results/clientpositive/bucket_groupby.q.out b/ql/src/test/results/clientpositive/bucket_groupby.q.out index 0e30801..676326a 100644 --- a/ql/src/test/results/clientpositive/bucket_groupby.q.out +++ b/ql/src/test/results/clientpositive/bucket_groupby.q.out @@ -398,16 +398,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) + key expressions: _col0 (type: string), 3 (type: int) sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Map-reduce partition columns: _col0 (type: string), 3 (type: int) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: int) + keys: KEY._col0 (type: string), 3 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE @@ -935,15 +935,15 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) + key expressions: _col0 (type: string), 3 (type: int) sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Map-reduce partition columns: _col0 (type: string), 3 (type: int) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: int) + keys: KEY._col0 (type: string), 3 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/cluster.q.out b/ql/src/test/results/clientpositive/cluster.q.out index 2da1f91..5988742 100644 --- a/ql/src/test/results/clientpositive/cluster.q.out +++ b/ql/src/test/results/clientpositive/cluster.q.out @@ -524,9 +524,10 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: '20' (type: string) Reduce Operator Tree: Select Operator - expressions: '20' (type: string), KEY.reducesinkkey0 (type: string), '20' (type: string) + expressions: '20' (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -620,10 +621,10 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: string) + value expressions: '20' (type: string), _col3 (type: string) Reduce Operator Tree: Select Operator - expressions: '20' (type: string), KEY.reducesinkkey0 (type: string), '20' (type: string), VALUE._col2 (type: string) + expressions: '20' (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -717,10 +718,10 @@ STAGE PLANS: sort order: + Map-reduce partition columns: '20' (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col3 (type: string) + value expressions: _col1 (type: string), '20' (type: string), _col3 (type: string) Reduce Operator Tree: Select Operator - expressions: '20' (type: string), VALUE._col0 (type: string), '20' (type: string), VALUE._col2 (type: string) + expressions: '20' (type: string), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -809,10 +810,10 @@ STAGE PLANS: sort order: + Map-reduce partition columns: '20' (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: _col1 (type: string), '20' (type: string) Reduce Operator Tree: Select Operator - expressions: '20' (type: string), VALUE._col0 (type: string), '20' (type: string) + expressions: '20' (type: string), VALUE._col0 (type: string), VALUE._col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/dynpart_sort_optimization2.q.out b/ql/src/test/results/clientpositive/dynpart_sort_optimization2.q.out index 141bcd8..9edd94b 100644 --- a/ql/src/test/results/clientpositive/dynpart_sort_optimization2.q.out +++ b/ql/src/test/results/clientpositive/dynpart_sort_optimization2.q.out @@ -1547,15 +1547,15 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) + key expressions: 'day' (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Map-reduce partition columns: 'day' (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) + keys: 'day' (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE @@ -1642,9 +1642,8 @@ group by "day", key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -1664,15 +1663,15 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) + key expressions: 'day' (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Map-reduce partition columns: 'day' (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) + keys: 'day' (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE @@ -1682,34 +1681,12 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col2 (type: string) - sort order: + - Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.hive13_dp1 + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.hive13_dp1 Stage: Stage-0 Move Operator @@ -1723,7 +1700,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.hive13_dp1 - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator PREHOOK: query: insert overwrite table `hive13_dp1` partition(`day`) diff --git a/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out b/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out index ceecbb9..9c45602 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out @@ -1873,9 +1873,9 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + key expressions: _col0 (type: string), 1 (type: int), _col2 (type: string) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), 1 (type: int), _col2 (type: string) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col3 (type: bigint) @@ -1937,7 +1937,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) + keys: KEY._col0 (type: string), 1 (type: int), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out b/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out index 009ab2e..ad263bc 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out @@ -1939,7 +1939,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + key expressions: _col0 (type: string), 1 (type: int), _col2 (type: string) sort order: +++ Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -2003,7 +2003,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) + keys: KEY._col0 (type: string), 1 (type: int), KEY._col2 (type: string) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -2031,9 +2031,9 @@ STAGE PLANS: TableScan GatherStats: false Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + key expressions: _col0 (type: string), 1 (type: int), _col2 (type: string) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) + Map-reduce partition columns: _col0 (type: string), 1 (type: int), _col2 (type: string) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col3 (type: bigint) @@ -2067,7 +2067,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) + keys: KEY._col0 (type: string), 1 (type: int), KEY._col2 (type: string) mode: final outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/ppd2.q.out b/ql/src/test/results/clientpositive/ppd2.q.out index d583b17..c79482b 100644 --- a/ql/src/test/results/clientpositive/ppd2.q.out +++ b/ql/src/test/results/clientpositive/ppd2.q.out @@ -386,9 +386,10 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: '20' (type: string) Reduce Operator Tree: Select Operator - expressions: '20' (type: string), KEY.reducesinkkey0 (type: string), '20' (type: string) + expressions: '20' (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/ppd_clusterby.q.out b/ql/src/test/results/clientpositive/ppd_clusterby.q.out index b21835f..6b400ca 100644 --- a/ql/src/test/results/clientpositive/ppd_clusterby.q.out +++ b/ql/src/test/results/clientpositive/ppd_clusterby.q.out @@ -119,9 +119,10 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: '20' (type: string) Reduce Operator Tree: Select Operator - expressions: '20' (type: string), KEY.reducesinkkey0 (type: string), '20' (type: string) + expressions: '20' (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -268,9 +269,10 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: '20' (type: string) Reduce Operator Tree: Select Operator - expressions: '20' (type: string), KEY.reducesinkkey0 (type: string), '20' (type: string) + expressions: '20' (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/vector_null_projection.q.out b/ql/src/test/results/clientpositive/vector_null_projection.q.out index c787fd4..941b8e1 100644 --- a/ql/src/test/results/clientpositive/vector_null_projection.q.out +++ b/ql/src/test/results/clientpositive/vector_null_projection.q.out @@ -111,9 +111,9 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: void) + key expressions: null (type: void) sort order: + - Map-reduce partition columns: _col0 (type: void) + Map-reduce partition columns: null (type: void) Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE TableScan alias: b @@ -130,23 +130,27 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: void) + key expressions: null (type: void) sort order: + - Map-reduce partition columns: _col0 (type: void) + Map-reduce partition columns: null (type: void) Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: void) + keys: null (type: void) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - File Output Operator - compressed: false + Select Operator + expressions: null (type: void) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator