diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java index e3da7f0..a62800d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java @@ -35,30 +35,30 @@ private static final long serialVersionUID = 1L; - private String internalName; + protected String internalName; // The name from the table column for OPs like TS or auto-generated for internal OPs - private String alias = null; // [optional] alias of the column (external name + protected String alias = null; // [optional] alias of the column (external name // as seen by the users) /** * Indicates whether the column is a skewed column. */ - private boolean isSkewedCol; + protected boolean isSkewedCol; /** * Store the alias of the table where available. */ - private String tabAlias; + protected String tabAlias; /** * Indicates whether the column is a virtual column. */ - private boolean isVirtualCol; + protected boolean isVirtualCol; - private ObjectInspector objectInspector; + protected ObjectInspector objectInspector; - private boolean isHiddenVirtualCol; + protected boolean isHiddenVirtualCol; - private String typeName; + protected String typeName; public ColumnInfo() { } @@ -253,4 +253,32 @@ public String toMappingString(String tab, String col) { public void setObjectinspector(ObjectInspector writableObjectInspector) { this.objectInspector = writableObjectInspector; } + + /** + * Test if a column given the table alias and column alias matches the current ColumnInfo + * @param tabAlias + * @param columnAlias + * @return True if both table alias and column alias are the same + */ + public boolean match(String tabAlias, String columnAlias) { + if (this.tabAlias == null) { + if (tabAlias == null) { + if(this.alias != null && columnAlias != null && + this.alias.equals(columnAlias)) { + return true; + } + } + } + else { + if (tabAlias != null) { + if (this.tabAlias.equals(tabAlias) && + this.alias != null && columnAlias != null && + this.alias.equals(columnAlias)) { + return true; + } + } + } + + return false; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/NamedColumnInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/NamedColumnInfo.java new file mode 100644 index 0000000..7fb7c12 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/NamedColumnInfo.java @@ -0,0 +1,73 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec; + +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; + +public class NamedColumnInfo extends ColumnInfo { + + private static final long serialVersionUID = 1L; + private String columnName; + + public NamedColumnInfo(String columnName, + String internalName, + ObjectInspector columnOI, + String tabAlias) { + super(internalName, columnOI, tabAlias, false, false); + if (columnName != null) { + this.columnName = columnName.toLowerCase(); + } + } + + @Override + public String getAlias() { + return alias == null ? columnName : alias; + } + + /** + * In cases the column has both column name and alias such as SEL operator in + * 'select c as calias', 'c' is visible to the parent but not the alias 'calias'. + * So column alias needs to check against column name rather than alias. + * @param tabAlias + * @param columnAlias + * @return True if both table alias and column alias are the same + */ + @Override + public boolean match(String tabAlias, String columnAlias) { + if (this.tabAlias == null) { + if (tabAlias == null) { + if(this.columnName != null && columnAlias != null && + this.columnName.equals(columnAlias)) { + return true; + } + } + } + else { + if (tabAlias != null) { + if (this.tabAlias.equals(tabAlias) && + this.columnName != null && columnAlias != null && + this.columnName.equals(columnAlias)) { + return true; + } + } + } + + return false; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java index 1acb3b3..84f7c88 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java @@ -61,24 +61,17 @@ public ColumnInfo getColumnInfo(String internalName) { return null; } + /** + * Given table alias and column alias, match them in the current RowSchema. + * + * @param tabAlias + * @param alias + * @return + */ public ColumnInfo getColumnInfo(String tabAlias, String alias) { for (ColumnInfo columnInfo: this.signature) { - if (columnInfo.getTabAlias() == null) { - if (tabAlias == null) { - if(columnInfo.getAlias() != null && alias != null && - columnInfo.getAlias().equals(alias)) { - return columnInfo; - } - } - } - else { - if (tabAlias != null) { - if (columnInfo.getTabAlias().equals(tabAlias) && - columnInfo.getAlias() != null && alias != null && - columnInfo.getAlias().equals(alias)) { - return columnInfo; - } - } + if (columnInfo.match(tabAlias, alias) ) { + return columnInfo; } } return null; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcCtx.java index 1814550..e10dd9b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcCtx.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcCtx.java @@ -73,15 +73,15 @@ public ConstantPropagateProcCtx(ConstantPropagateOption option) { } /** - * Resolve a ColumnInfo based on given RowResolver. + * Resolve a given ColumnInfo to the ColumnInfo in the given RowSchema. * * @param ci - * @param rr - * @param parentRR + * @param rs + * @param parentRS * @return * @throws SemanticException */ - private ColumnInfo resolve(ColumnInfo ci, RowSchema rs, RowSchema parentRS) { + private static ColumnInfo resolve(ColumnInfo ci, RowSchema rs, RowSchema parentRS) { // Resolve new ColumnInfo from String alias = ci.getAlias(); if (alias == null) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 3e91e10..f575b5e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -72,6 +72,7 @@ import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; import org.apache.hadoop.hive.ql.exec.ArchiveUtils; import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.NamedColumnInfo; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; import org.apache.hadoop.hive.ql.exec.FetchTask; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; @@ -4105,8 +4106,10 @@ static boolean isRegex(String pattern, HiveConf conf) { } col_list.add(exp); - ColumnInfo colInfo = new ColumnInfo(getColumnInternalName(pos), - exp.getWritableObjectInspector(), tabAlias, false); + NamedColumnInfo colInfo = new NamedColumnInfo( + getColumnName(exp, colAlias), + getColumnInternalName(pos), + exp.getWritableObjectInspector(), tabAlias); colInfo.setSkewedCol((exp instanceof ExprNodeColumnDesc) ? ((ExprNodeColumnDesc) exp) .isSkewedCol() : false); out_rwsch.put(tabAlias, colAlias, colInfo); @@ -4248,6 +4251,7 @@ public RowResolver handleInsertStatementSpec(List col_list, String col_list.addAll(new_col_list); return newOutputRR; } + String recommendName(ExprNodeDesc exp, String colAlias) { if (!colAlias.startsWith(autogenColAliasPrfxLbl)) { return null; @@ -4259,6 +4263,26 @@ String recommendName(ExprNodeDesc exp, String colAlias) { return null; } + /** + * Gets the column name from the original query for the column or the alias if + * the column is a function. + * @param exp + * @param colAlias + * @return + */ + private String getColumnName(ExprNodeDesc exp, String colAlias) { + String columnName = colAlias != null ? colAlias : null; + if (exp instanceof ExprNodeColumnDesc) { + ExprNodeColumnDesc columnDesc = (ExprNodeColumnDesc)exp; + String column = columnDesc.getColumn(); + if (column != null && !column.startsWith(autogenColAliasPrfxLbl)) { + columnName = column; + } + } + + return columnName; + } + String getAutogenColAliasPrfxLbl() { return this.autogenColAliasPrfxLbl; } diff --git a/ql/src/test/queries/clientpositive/constantPropagateForInsertSelect.q b/ql/src/test/queries/clientpositive/constantPropagateForInsertSelect.q new file mode 100644 index 0000000..4e49998 --- /dev/null +++ b/ql/src/test/queries/clientpositive/constantPropagateForInsertSelect.q @@ -0,0 +1,14 @@ +set hive.cbo.enable=false; +set hive.mapred.mode=nonstrict; +-- SORT_QUERY_RESULTS + +create table t1(c1 string, c2 double) partitioned by (p1 string, p2 string); +create table t2(p1 double, c2 string); +insert into table t1 partition(p1='p1', p2='p2') values('c1', 0.0); +insert into table t1 partition(p1='40', p2='p2') values('c1', 0.0); +explain +INSERT OVERWRITE TABLE t2 select if((c2 = 0.0), c2, '3.0') as p1, p1 as p2 from t1 where c1 = 'c1' and p1 = '40'; +INSERT OVERWRITE TABLE t2 select if((c2 = 0.0), c2, '3.0') as p1, p1 as p2 from t1 where c1 = 'c1' and p1 = '40'; + +select * from t2; + diff --git a/ql/src/test/results/clientpositive/constantPropagateForInsertSelect.q.out b/ql/src/test/results/clientpositive/constantPropagateForInsertSelect.q.out new file mode 100644 index 0000000..aa4ac98 --- /dev/null +++ b/ql/src/test/results/clientpositive/constantPropagateForInsertSelect.q.out @@ -0,0 +1,152 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +create table t1(c1 string, c2 double) partitioned by (p1 string, p2 string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +create table t1(c1 string, c2 double) partitioned by (p1 string, p2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: create table t2(p1 double, c2 string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t2 +POSTHOOK: query: create table t2(p1 double, c2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t2 +PREHOOK: query: insert into table t1 partition(p1='p1', p2='p2') values('c1', 0.0) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@t1@p1=p1/p2=p2 +POSTHOOK: query: insert into table t1 partition(p1='p1', p2='p2') values('c1', 0.0) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@t1@p1=p1/p2=p2 +POSTHOOK: Lineage: t1 PARTITION(p1=p1,p2=p2).c1 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t1 PARTITION(p1=p1,p2=p2).c2 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into table t1 partition(p1='40', p2='p2') values('c1', 0.0) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@t1@p1=40/p2=p2 +POSTHOOK: query: insert into table t1 partition(p1='40', p2='p2') values('c1', 0.0) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@t1@p1=40/p2=p2 +POSTHOOK: Lineage: t1 PARTITION(p1=40,p2=p2).c1 SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t1 PARTITION(p1=40,p2=p2).c2 EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: explain +INSERT OVERWRITE TABLE t2 select if((c2 = 0.0), c2, '3.0') as p1, p1 as p2 from t1 where c1 = 'c1' and p1 = '40' +PREHOOK: type: QUERY +POSTHOOK: query: explain +INSERT OVERWRITE TABLE t2 select if((c2 = 0.0), c2, '3.0') as p1, p1 as p2 from t1 where c1 = 'c1' and p1 = '40' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (c1 = 'c1') (type: boolean) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToDouble(if((c2 = 0.0), c2, '3.0')) (type: double), '40' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t2 + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t2 + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t2 + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: INSERT OVERWRITE TABLE t2 select if((c2 = 0.0), c2, '3.0') as p1, p1 as p2 from t1 where c1 = 'c1' and p1 = '40' +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t1@p1=40/p2=p2 +PREHOOK: Output: default@t2 +POSTHOOK: query: INSERT OVERWRITE TABLE t2 select if((c2 = 0.0), c2, '3.0') as p1, p1 as p2 from t1 where c1 = 'c1' and p1 = '40' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t1@p1=40/p2=p2 +POSTHOOK: Output: default@t2 +POSTHOOK: Lineage: t2.c2 SIMPLE [(t1)t1.FieldSchema(name:p1, type:string, comment:null), ] +POSTHOOK: Lineage: t2.p1 EXPRESSION [(t1)t1.FieldSchema(name:c2, type:double, comment:null), ] +PREHOOK: query: select * from t2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select * from t2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +0.0 40