diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java index e3da7f0..9db1ecb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java @@ -35,7 +35,7 @@ private static final long serialVersionUID = 1L; - private String internalName; + private String internalName; // The name from the table column for OPs like TS or auto-generated for internal OPs private String alias = null; // [optional] alias of the column (external name // as seen by the users) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java index 1acb3b3..e86f3ac 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java @@ -61,12 +61,21 @@ public ColumnInfo getColumnInfo(String internalName) { return null; } + /** + * Given table alias and column alias, match them in the current RowSchema. + * InternalName will be used to match since InternalName will be visible in + * current scope (to the parent OPs) while alias will be visible outside the + * current scope (to the children OPs). + * @param tabAlias + * @param alias + * @return + */ public ColumnInfo getColumnInfo(String tabAlias, String alias) { for (ColumnInfo columnInfo: this.signature) { if (columnInfo.getTabAlias() == null) { if (tabAlias == null) { - if(columnInfo.getAlias() != null && alias != null && - columnInfo.getAlias().equals(alias)) { + if(columnInfo.getInternalName() != null && alias != null && + columnInfo.getInternalName().equals(alias)) { return columnInfo; } } @@ -74,8 +83,8 @@ public ColumnInfo getColumnInfo(String tabAlias, String alias) { else { if (tabAlias != null) { if (columnInfo.getTabAlias().equals(tabAlias) && - columnInfo.getAlias() != null && alias != null && - columnInfo.getAlias().equals(alias)) { + columnInfo.getInternalName() != null && alias != null && + columnInfo.getInternalName().equals(alias)) { return columnInfo; } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcCtx.java index 1814550..e10dd9b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcCtx.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcCtx.java @@ -73,15 +73,15 @@ public ConstantPropagateProcCtx(ConstantPropagateOption option) { } /** - * Resolve a ColumnInfo based on given RowResolver. + * Resolve a given ColumnInfo to the ColumnInfo in the given RowSchema. * * @param ci - * @param rr - * @param parentRR + * @param rs + * @param parentRS * @return * @throws SemanticException */ - private ColumnInfo resolve(ColumnInfo ci, RowSchema rs, RowSchema parentRS) { + private static ColumnInfo resolve(ColumnInfo ci, RowSchema rs, RowSchema parentRS) { // Resolve new ColumnInfo from String alias = ci.getAlias(); if (alias == null) { diff --git a/ql/src/test/queries/clientpositive/constantPropagateForInsertSelect.q b/ql/src/test/queries/clientpositive/constantPropagateForInsertSelect.q new file mode 100644 index 0000000..4e49998 --- /dev/null +++ b/ql/src/test/queries/clientpositive/constantPropagateForInsertSelect.q @@ -0,0 +1,14 @@ +set hive.cbo.enable=false; +set hive.mapred.mode=nonstrict; +-- SORT_QUERY_RESULTS + +create table t1(c1 string, c2 double) partitioned by (p1 string, p2 string); +create table t2(p1 double, c2 string); +insert into table t1 partition(p1='p1', p2='p2') values('c1', 0.0); +insert into table t1 partition(p1='40', p2='p2') values('c1', 0.0); +explain +INSERT OVERWRITE TABLE t2 select if((c2 = 0.0), c2, '3.0') as p1, p1 as p2 from t1 where c1 = 'c1' and p1 = '40'; +INSERT OVERWRITE TABLE t2 select if((c2 = 0.0), c2, '3.0') as p1, p1 as p2 from t1 where c1 = 'c1' and p1 = '40'; + +select * from t2; + diff --git a/ql/src/test/results/clientpositive/constantPropagateForInsertSelect.q.out b/ql/src/test/results/clientpositive/constantPropagateForInsertSelect.q.out new file mode 100644 index 0000000..aa4ac98 --- /dev/null +++ b/ql/src/test/results/clientpositive/constantPropagateForInsertSelect.q.out @@ -0,0 +1,152 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +create table t1(c1 string, c2 double) partitioned by (p1 string, p2 string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +create table t1(c1 string, c2 double) partitioned by (p1 string, p2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: create table t2(p1 double, c2 string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t2 +POSTHOOK: query: create table t2(p1 double, c2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t2 +PREHOOK: query: insert into table t1 partition(p1='p1', p2='p2') values('c1', 0.0) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@t1@p1=p1/p2=p2 +POSTHOOK: query: insert into table t1 partition(p1='p1', p2='p2') values('c1', 0.0) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@t1@p1=p1/p2=p2 +POSTHOOK: Lineage: t1 PARTITION(p1=p1,p2=p2).c1 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t1 PARTITION(p1=p1,p2=p2).c2 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into table t1 partition(p1='40', p2='p2') values('c1', 0.0) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@t1@p1=40/p2=p2 +POSTHOOK: query: insert into table t1 partition(p1='40', p2='p2') values('c1', 0.0) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@t1@p1=40/p2=p2 +POSTHOOK: Lineage: t1 PARTITION(p1=40,p2=p2).c1 SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t1 PARTITION(p1=40,p2=p2).c2 EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: explain +INSERT OVERWRITE TABLE t2 select if((c2 = 0.0), c2, '3.0') as p1, p1 as p2 from t1 where c1 = 'c1' and p1 = '40' +PREHOOK: type: QUERY +POSTHOOK: query: explain +INSERT OVERWRITE TABLE t2 select if((c2 = 0.0), c2, '3.0') as p1, p1 as p2 from t1 where c1 = 'c1' and p1 = '40' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (c1 = 'c1') (type: boolean) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToDouble(if((c2 = 0.0), c2, '3.0')) (type: double), '40' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t2 + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t2 + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t2 + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: INSERT OVERWRITE TABLE t2 select if((c2 = 0.0), c2, '3.0') as p1, p1 as p2 from t1 where c1 = 'c1' and p1 = '40' +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t1@p1=40/p2=p2 +PREHOOK: Output: default@t2 +POSTHOOK: query: INSERT OVERWRITE TABLE t2 select if((c2 = 0.0), c2, '3.0') as p1, p1 as p2 from t1 where c1 = 'c1' and p1 = '40' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t1@p1=40/p2=p2 +POSTHOOK: Output: default@t2 +POSTHOOK: Lineage: t2.c2 SIMPLE [(t1)t1.FieldSchema(name:p1, type:string, comment:null), ] +POSTHOOK: Lineage: t2.p1 EXPRESSION [(t1)t1.FieldSchema(name:c2, type:double, comment:null), ] +PREHOOK: query: select * from t2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select * from t2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +0.0 40