diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java index e3da7f0..52146dc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java @@ -20,6 +20,7 @@ import java.io.Serializable; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; @@ -60,6 +61,11 @@ private String typeName; + /** + * The column expression from the original query + */ + private ExprNodeDesc exprColumn; + public ColumnInfo() { } @@ -107,6 +113,11 @@ public ColumnInfo(ColumnInfo columnInfo) { this.isVirtualCol = columnInfo.getIsVirtualCol(); this.isHiddenVirtualCol = columnInfo.isHiddenVirtualCol(); this.setType(columnInfo.getType()); + this.exprColumn = columnInfo.exprColumn; + } + + public void setColumnExpr(ExprNodeDesc exprColumn) { + this.exprColumn = exprColumn; } public String getTypeName() { @@ -253,4 +264,37 @@ public String toMappingString(String tab, String col) { public void setObjectinspector(ObjectInspector writableObjectInspector) { this.objectInspector = writableObjectInspector; } + + /** + * Test if a column given the table alias and column alias matches the current ColumnInfo + * @param tabAlias + * @param columnAlias + * @return True if both table alias and column alias are the same + */ + public boolean match(String tabAlias, String columnAlias) { + String columnName = this.alias; + if (exprColumn != null) { + columnName = exprColumn.getExprString(); + } + + if (this.tabAlias == null) { + if (tabAlias == null) { + if(columnName != null && columnAlias != null && + columnName.equals(columnAlias)) { + return true; + } + } + } + else { + if (tabAlias != null) { + if (this.tabAlias.equals(tabAlias) && + columnName != null && columnAlias != null && + columnName.equals(columnAlias)) { + return true; + } + } + } + + return false; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java index 1acb3b3..84f7c88 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java @@ -61,24 +61,17 @@ public ColumnInfo getColumnInfo(String internalName) { return null; } + /** + * Given table alias and column alias, match them in the current RowSchema. + * + * @param tabAlias + * @param alias + * @return + */ public ColumnInfo getColumnInfo(String tabAlias, String alias) { for (ColumnInfo columnInfo: this.signature) { - if (columnInfo.getTabAlias() == null) { - if (tabAlias == null) { - if(columnInfo.getAlias() != null && alias != null && - columnInfo.getAlias().equals(alias)) { - return columnInfo; - } - } - } - else { - if (tabAlias != null) { - if (columnInfo.getTabAlias().equals(tabAlias) && - columnInfo.getAlias() != null && alias != null && - columnInfo.getAlias().equals(alias)) { - return columnInfo; - } - } + if (columnInfo.match(tabAlias, alias) ) { + return columnInfo; } } return null; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcCtx.java index 1814550..e10dd9b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcCtx.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcCtx.java @@ -73,15 +73,15 @@ public ConstantPropagateProcCtx(ConstantPropagateOption option) { } /** - * Resolve a ColumnInfo based on given RowResolver. + * Resolve a given ColumnInfo to the ColumnInfo in the given RowSchema. * * @param ci - * @param rr - * @param parentRR + * @param rs + * @param parentRS * @return * @throws SemanticException */ - private ColumnInfo resolve(ColumnInfo ci, RowSchema rs, RowSchema parentRS) { + private static ColumnInfo resolve(ColumnInfo ci, RowSchema rs, RowSchema parentRS) { // Resolve new ColumnInfo from String alias = ci.getAlias(); if (alias == null) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 06db7f9..b45de95 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -71,7 +71,6 @@ import org.apache.hadoop.hive.metastore.api.SQLForeignKey; import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey; import org.apache.hadoop.hive.ql.CompilationOpContext; -import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.QueryProperties; import org.apache.hadoop.hive.ql.QueryState; @@ -4160,6 +4159,11 @@ static boolean isRegex(String pattern, HiveConf conf) { ColumnInfo colInfo = new ColumnInfo(getColumnInternalName(pos), exp.getWritableObjectInspector(), tabAlias, false); + // Set the original column expression for non-CBO. CBO does the optimization + // on orignal AST, so exp may be the optimized not the original one, so don't set. + if (!isCBOExecuted()) { + colInfo.setColumnExpr(exp); + } colInfo.setSkewedCol((exp instanceof ExprNodeColumnDesc) ? ((ExprNodeColumnDesc) exp) .isSkewedCol() : false); out_rwsch.put(tabAlias, colAlias, colInfo); diff --git a/ql/src/test/queries/clientpositive/constantPropagateForInsertSelect.q b/ql/src/test/queries/clientpositive/constantPropagateForInsertSelect.q new file mode 100644 index 0000000..4e49998 --- /dev/null +++ b/ql/src/test/queries/clientpositive/constantPropagateForInsertSelect.q @@ -0,0 +1,14 @@ +set hive.cbo.enable=false; +set hive.mapred.mode=nonstrict; +-- SORT_QUERY_RESULTS + +create table t1(c1 string, c2 double) partitioned by (p1 string, p2 string); +create table t2(p1 double, c2 string); +insert into table t1 partition(p1='p1', p2='p2') values('c1', 0.0); +insert into table t1 partition(p1='40', p2='p2') values('c1', 0.0); +explain +INSERT OVERWRITE TABLE t2 select if((c2 = 0.0), c2, '3.0') as p1, p1 as p2 from t1 where c1 = 'c1' and p1 = '40'; +INSERT OVERWRITE TABLE t2 select if((c2 = 0.0), c2, '3.0') as p1, p1 as p2 from t1 where c1 = 'c1' and p1 = '40'; + +select * from t2; + diff --git a/ql/src/test/results/clientpositive/constantPropagateForInsertSelect.q.out b/ql/src/test/results/clientpositive/constantPropagateForInsertSelect.q.out new file mode 100644 index 0000000..aa4ac98 --- /dev/null +++ b/ql/src/test/results/clientpositive/constantPropagateForInsertSelect.q.out @@ -0,0 +1,152 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +create table t1(c1 string, c2 double) partitioned by (p1 string, p2 string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +create table t1(c1 string, c2 double) partitioned by (p1 string, p2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: create table t2(p1 double, c2 string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t2 +POSTHOOK: query: create table t2(p1 double, c2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t2 +PREHOOK: query: insert into table t1 partition(p1='p1', p2='p2') values('c1', 0.0) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@t1@p1=p1/p2=p2 +POSTHOOK: query: insert into table t1 partition(p1='p1', p2='p2') values('c1', 0.0) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@t1@p1=p1/p2=p2 +POSTHOOK: Lineage: t1 PARTITION(p1=p1,p2=p2).c1 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t1 PARTITION(p1=p1,p2=p2).c2 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into table t1 partition(p1='40', p2='p2') values('c1', 0.0) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@t1@p1=40/p2=p2 +POSTHOOK: query: insert into table t1 partition(p1='40', p2='p2') values('c1', 0.0) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@t1@p1=40/p2=p2 +POSTHOOK: Lineage: t1 PARTITION(p1=40,p2=p2).c1 SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t1 PARTITION(p1=40,p2=p2).c2 EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: explain +INSERT OVERWRITE TABLE t2 select if((c2 = 0.0), c2, '3.0') as p1, p1 as p2 from t1 where c1 = 'c1' and p1 = '40' +PREHOOK: type: QUERY +POSTHOOK: query: explain +INSERT OVERWRITE TABLE t2 select if((c2 = 0.0), c2, '3.0') as p1, p1 as p2 from t1 where c1 = 'c1' and p1 = '40' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (c1 = 'c1') (type: boolean) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToDouble(if((c2 = 0.0), c2, '3.0')) (type: double), '40' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t2 + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t2 + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t2 + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: INSERT OVERWRITE TABLE t2 select if((c2 = 0.0), c2, '3.0') as p1, p1 as p2 from t1 where c1 = 'c1' and p1 = '40' +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t1@p1=40/p2=p2 +PREHOOK: Output: default@t2 +POSTHOOK: query: INSERT OVERWRITE TABLE t2 select if((c2 = 0.0), c2, '3.0') as p1, p1 as p2 from t1 where c1 = 'c1' and p1 = '40' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t1@p1=40/p2=p2 +POSTHOOK: Output: default@t2 +POSTHOOK: Lineage: t2.c2 SIMPLE [(t1)t1.FieldSchema(name:p1, type:string, comment:null), ] +POSTHOOK: Lineage: t2.p1 EXPRESSION [(t1)t1.FieldSchema(name:c2, type:double, comment:null), ] +PREHOOK: query: select * from t2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select * from t2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +0.0 40