diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 46350a3..81da19f 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -411,7 +411,8 @@ minitez.query.files.shared=acid_globallimit.q,\ union_type_chk.q -minitez.query.files=bucket_map_join_tez1.q,\ +minitez.query.files=acid_vectorization_missing_cols.q,\ + bucket_map_join_tez1.q,\ smb_cache.q,\ bucket_map_join_tez2.q,\ constprog_dpp.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 774cc2b..53f3b05 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -4282,12 +4282,13 @@ public RowResolver handleInsertStatementSpec(List col_list, String "No table/partition found in QB metadata for dest='" + dest + "'")); } ArrayList new_col_list = new ArrayList(); - ArrayList newSchema = new ArrayList(); colListPos = 0; List targetTableCols = target != null ? target.getCols() : partition.getCols(); List targetTableColNames = new ArrayList(); + List targetTableColTypes = new ArrayList(); for(FieldSchema fs : targetTableCols) { targetTableColNames.add(fs.getName()); + targetTableColTypes.add(TypeInfoUtils.getTypeInfoFromTypeString(fs.getType())); } Map partSpec = qb.getMetaData().getPartSpecForAlias(dest); if(partSpec != null) { @@ -4296,13 +4297,15 @@ public RowResolver handleInsertStatementSpec(List col_list, String for(Map.Entry partKeyVal : partSpec.entrySet()) { if (partKeyVal.getValue() == null) { targetTableColNames.add(partKeyVal.getKey());//these must be after non-partition cols + targetTableColTypes.add(TypeInfoFactory.stringTypeInfo); } } } RowResolver newOutputRR = new RowResolver(); //now make the select produce , with //where missing columns are NULL-filled - for(String f : targetTableColNames) { + for (int i = 0; i < targetTableColNames.size(); i++) { + String f = targetTableColNames.get(i); if(targetCol2Projection.containsKey(f)) { //put existing column in new list to make sure it is in the right position new_col_list.add(targetCol2Projection.get(f)); @@ -4312,10 +4315,7 @@ public RowResolver handleInsertStatementSpec(List col_list, String } else { //add new 'synthetic' columns for projections not provided by Select - TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); - CommonToken t = new CommonToken(HiveParser.TOK_NULL); - t.setText("TOK_NULL"); - ExprNodeDesc exp = genExprNodeDesc(new ASTNode(t), inputRR, tcCtx); + ExprNodeDesc exp = new ExprNodeConstantDesc(targetTableColTypes.get(i), null); new_col_list.add(exp); final String tableAlias = null;//this column doesn't come from any table ColumnInfo colInfo = new ColumnInfo(getColumnInternalName(colListPos), diff --git ql/src/test/queries/clientpositive/acid_vectorization_missing_cols.q ql/src/test/queries/clientpositive/acid_vectorization_missing_cols.q new file mode 100644 index 0000000..85e0bb1 --- /dev/null +++ ql/src/test/queries/clientpositive/acid_vectorization_missing_cols.q @@ -0,0 +1,21 @@ +set hive.vectorized.execution.enabled=true; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + +drop table if exists newtable; +create table newtable( + a string, + b int, + c double) +row format delimited +fields terminated by '\t' +stored as textfile; + +drop table if exists newtable_acid; +create table newtable_acid (b int, a varchar(50),c decimal(3,2), d int) +clustered by (b) into 2 buckets +stored as orc +tblproperties ('transactional'='true'); + +insert into newtable_acid(a,b,c) +select * from newtable; diff --git ql/src/test/results/clientpositive/tez/acid_vectorization_missing_cols.q.out ql/src/test/results/clientpositive/tez/acid_vectorization_missing_cols.q.out new file mode 100644 index 0000000..6acc6e3 --- /dev/null +++ ql/src/test/results/clientpositive/tez/acid_vectorization_missing_cols.q.out @@ -0,0 +1,56 @@ +PREHOOK: query: drop table if exists newtable +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists newtable +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table newtable( + a string, + b int, + c double) +row format delimited +fields terminated by '\t' +stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@newtable +POSTHOOK: query: create table newtable( + a string, + b int, + c double) +row format delimited +fields terminated by '\t' +stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@newtable +PREHOOK: query: drop table if exists newtable_acid +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists newtable_acid +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table newtable_acid (b int, a varchar(50),c decimal(3,2), d int) +clustered by (b) into 2 buckets +stored as orc +tblproperties ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@newtable_acid +POSTHOOK: query: create table newtable_acid (b int, a varchar(50),c decimal(3,2), d int) +clustered by (b) into 2 buckets +stored as orc +tblproperties ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@newtable_acid +PREHOOK: query: insert into newtable_acid(a,b,c) +select * from newtable +PREHOOK: type: QUERY +PREHOOK: Input: default@newtable +PREHOOK: Output: default@newtable_acid +POSTHOOK: query: insert into newtable_acid(a,b,c) +select * from newtable +POSTHOOK: type: QUERY +POSTHOOK: Input: default@newtable +POSTHOOK: Output: default@newtable_acid +POSTHOOK: Lineage: newtable_acid.a EXPRESSION [(newtable)newtable.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: newtable_acid.b SIMPLE [(newtable)newtable.FieldSchema(name:b, type:int, comment:null), ] +POSTHOOK: Lineage: newtable_acid.c EXPRESSION [(newtable)newtable.FieldSchema(name:c, type:double, comment:null), ] +POSTHOOK: Lineage: newtable_acid.d SIMPLE []