diff --git data/files/inventory data/files/inventory new file mode 100644 index 0000000..49fd58a --- /dev/null +++ data/files/inventory @@ -0,0 +1,100 @@ +2450815|1|1|211| +2450815|2|1|235| +2450815|4|1|859| +2450815|7|1|704| +2450815|8|1|891| +2450815|10|1|834| +2450815|13|1|591| +2450815|14|1|579| +2450815|16|1|622| +2450815|19|1|745| +2450815|20|1|405| +2450815|22|1|| +2450815|25|1|676| +2450815|26|1|865| +2450815|28|1|24| +2450815|31|1|170| +2450815|32|1|797| +2450815|34|1|332| +2450815|37|1|618| +2450815|38|1|909| +2450815|40|1|738| +2450815|43|1|699| +2450815|44|1|975| +2450815|46|1|551| +2450815|49|1|857| +2450815|50|1|894| +2450815|52|1|279| +2450815|55|1|206| +2450815|56|1|746| +2450815|58|1|| +2450815|61|1|154| +2450815|62|1|888| +2450815|64|1|773| +2450815|67|1|107| +2450815|68|1|801| +2450815|70|1|749| +2450815|73|1|419| +2450815|74|1|541| +2450815|76|1|359| +2450815|79|1|578| +2450815|80|1|42| +2450815|82|1|83| +2450815|85|1|192| +2450815|86|1|567| +2450815|88|1|710| +2450815|91|1|282| +2450815|92|1|631| +2450815|94|1|587| +2450815|97|1|645| +2450815|98|1|667| +2450815|100|1|26| +2450815|103|1|224| +2450815|104|1|901| +2450815|106|1|887| +2450815|109|1|455| +2450815|110|1|784| +2450815|112|1|742| +2450815|115|1|978| +2450815|116|1|207| +2450815|118|1|462| +2450815|121|1|406| +2450815|122|1|559| +2450815|124|1|999| +2450815|127|1|870| +2450815|128|1|982| +2450815|130|1|700| +2450815|133|1|26| +2450815|134|1|356| +2450815|136|1|143| +2450815|139|1|838| +2450815|140|1|846| +2450815|142|1|657| +2450815|145|1|181| +2450815|146|1|730| +2450815|148|1|49| +2450815|151|1|652| +2450815|152|1|861| +2450815|154|1|329| +2450815|157|1|286| +2450815|158|1|| +2450815|160|1|62| +2450815|163|1|894| +2450815|164|1|463| +2450815|166|1|458| +2450815|169|1|696| +2450815|170|1|450| +2450815|172|1|842| +2450815|175|1|79| +2450815|176|1|260| +2450815|178|1|894| +2450815|181|1|4| +2450815|182|1|281| +2450815|184|1|797| +2450815|187|1|981| +2450815|188|1|58| +2450815|190|1|834| +2450815|193|1|54| +2450815|194|1|856| +2450815|196|1|803| +2450815|199|1|296| diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 115d2cc..8f4344a 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -205,6 +205,7 @@ minitez.query.files.shared=alter_merge_2_orc.q,\ vector_mapjoin_reduce.q,\ vector_non_string_partition.q,\ vector_orderby_5.q,\ + vector_partition_diff_num_cols.q,\ vector_partitioned_date_time.q,\ vector_reduce_groupby_decimal.q,\ vector_string_concat.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 2c02bd4..c8e6ef5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -27,6 +27,7 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Properties; import java.util.Set; import java.util.Stack; import java.util.TreeMap; @@ -35,6 +36,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.exec.*; import org.apache.hadoop.hive.ql.exec.mr.MapRedTask; import org.apache.hadoop.hive.ql.exec.tez.TezTask; @@ -115,16 +117,11 @@ import org.apache.hadoop.hive.ql.udf.UDFWeekOfYear; import org.apache.hadoop.hive.ql.udf.UDFYear; import org.apache.hadoop.hive.ql.udf.generic.*; -import org.apache.hadoop.hive.serde2.Deserializer; -import org.apache.hadoop.hive.serde2.SerDe; -import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.util.ReflectionUtils; public class Vectorizer implements PhysicalPlanResolver { @@ -345,7 +342,7 @@ private boolean validateMapWork(MapWork mapWork, boolean isTez) throws SemanticE } } Map opRules = new LinkedHashMap(); - MapWorkValidationNodeProcessor vnp = new MapWorkValidationNodeProcessor(isTez); + MapWorkValidationNodeProcessor vnp = new MapWorkValidationNodeProcessor(mapWork, isTez); addMapWorkRules(opRules, vnp); Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null); GraphWalker ogw = new DefaultGraphWalker(disp); @@ -515,9 +512,11 @@ private void vectorizeReduceWork(ReduceWork reduceWork) throws SemanticException class MapWorkValidationNodeProcessor implements NodeProcessor { + private MapWork mapWork; private boolean isTez; - public MapWorkValidationNodeProcessor(boolean isTez) { + public MapWorkValidationNodeProcessor(MapWork mapWork, boolean isTez) { + this.mapWork = mapWork; this.isTez = isTez; } @@ -529,7 +528,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, if (nonVectorizableChildOfGroupBy(op)) { return new Boolean(true); } - boolean ret = validateMapWorkOperator(op, isTez); + boolean ret = validateMapWorkOperator(op, mapWork, isTez); if (!ret) { LOG.info("MapWork Operator: " + op.getName() + " could not be vectorized."); return new Boolean(false); @@ -845,7 +844,7 @@ public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException { return pctx; } - boolean validateMapWorkOperator(Operator op, boolean isTez) { + boolean validateMapWorkOperator(Operator op, MapWork mWork, boolean isTez) { boolean ret = false; switch (op.getType()) { case MAPJOIN: @@ -868,7 +867,7 @@ boolean validateMapWorkOperator(Operator op, boolean isT ret = validateReduceSinkOperator((ReduceSinkOperator) op); break; case TABLESCAN: - ret = validateTableScanOperator((TableScanOperator) op); + ret = validateTableScanOperator((TableScanOperator) op, mWork); break; case FILESINK: case LIMIT: @@ -950,9 +949,72 @@ private boolean validateSMBMapJoinOperator(SMBMapJoinOperator op) { return validateMapJoinDesc(desc); } - private boolean validateTableScanOperator(TableScanOperator op) { + private boolean validateTableScanOperator(TableScanOperator op, MapWork mWork) { TableScanDesc desc = op.getConf(); - return !desc.isGatherStats(); + if (desc.isGatherStats()) { + return false; + } + + String columns = ""; + String types = ""; + String partitionColumns = ""; + String partitionTypes = ""; + boolean haveInfo = false; + + // This over-reaches slightly, since we can have > 1 table-scan per map-work. + // It needs path to partition, path to alias, then check the alias == the same table-scan, to be accurate. + // That said, that is a TODO item to be fixed when we support >1 TableScans per vectorized pipeline later. + LinkedHashMap partitionDescs = mWork.getPathToPartitionInfo(); + + // For vectorization, compare each partition information for against the others. + // We assume the table information will be from one of the partitions, so it will + // work to focus on the partition information and not compare against the TableScanOperator + // columns (in the VectorizationContext).... + for (Map.Entry entry : partitionDescs.entrySet()) { + PartitionDesc partDesc = entry.getValue(); + if (partDesc.getPartSpec() == null || partDesc.getPartSpec().isEmpty()) { + // No partition information -- we match because we would default to using the table description. + continue; + } + Properties partProps = partDesc.getProperties(); + if (!haveInfo) { + columns = partProps.getProperty(hive_metastoreConstants.META_TABLE_COLUMNS); + types = partProps.getProperty(hive_metastoreConstants.META_TABLE_COLUMN_TYPES); + partitionColumns = partProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS); + partitionTypes = partProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES); + haveInfo = true; + } else { + String nextColumns = partProps.getProperty(hive_metastoreConstants.META_TABLE_COLUMNS); + String nextTypes = partProps.getProperty(hive_metastoreConstants.META_TABLE_COLUMN_TYPES); + String nextPartitionColumns = partProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS); + String nextPartitionTypes = partProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES); + if (!columns.equalsIgnoreCase(nextColumns)) { + LOG.info( + String.format("Could not vectorize partition %s. Its column names %s do not match the other column names %s", + entry.getKey(), nextColumns, columns)); + return false; + } + if (!types.equalsIgnoreCase(nextTypes)) { + LOG.info( + String.format("Could not vectorize partition %s. Its column types %s do not match the other column types %s", + entry.getKey(), nextTypes, types)); + return false; + } + if (!partitionColumns.equalsIgnoreCase(nextPartitionColumns)) { + LOG.info( + String.format("Could not vectorize partition %s. Its partition column names %s do not match the other partition column names %s", + entry.getKey(), nextPartitionColumns, partitionColumns)); + return false; + } + if (!partitionTypes.equalsIgnoreCase(nextPartitionTypes)) { + LOG.info( + String.format("Could not vectorize partition %s. Its partition column types %s do not match the other partition column types %s", + entry.getKey(), nextPartitionTypes, partitionTypes)); + return false; + } + } + } + return true; } private boolean validateMapJoinOperator(MapJoinOperator op) { diff --git ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java index 3271189..ec47c08 100644 --- ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java +++ ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java @@ -108,7 +108,7 @@ public void testAggregateOnUDF() throws HiveException { gbyOp.setConf(desc); Vectorizer v = new Vectorizer(); - Assert.assertTrue(v.validateMapWorkOperator(gbyOp, false)); + Assert.assertTrue(v.validateMapWorkOperator(gbyOp, null, false)); VectorGroupByOperator vectorOp = (VectorGroupByOperator) v.vectorizeOperator(gbyOp, vContext); Assert.assertEquals(VectorUDAFSumLong.class, vectorOp.getAggregators()[0].getClass()); VectorUDAFSumLong udaf = (VectorUDAFSumLong) vectorOp.getAggregators()[0]; @@ -188,7 +188,7 @@ public void testValidateMapJoinOperator() { map.setConf(mjdesc); Vectorizer vectorizer = new Vectorizer(); - Assert.assertTrue(vectorizer.validateMapWorkOperator(map, false)); + Assert.assertTrue(vectorizer.validateMapWorkOperator(map, null, false)); } @@ -204,6 +204,6 @@ public void testValidateSMBJoinOperator() { map.setConf(mjdesc); Vectorizer vectorizer = new Vectorizer(); - Assert.assertTrue(vectorizer.validateMapWorkOperator(map, false)); + Assert.assertTrue(vectorizer.validateMapWorkOperator(map, null, false)); } } diff --git ql/src/test/queries/clientpositive/vector_partition_diff_num_cols.q ql/src/test/queries/clientpositive/vector_partition_diff_num_cols.q new file mode 100644 index 0000000..30ea590 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_partition_diff_num_cols.q @@ -0,0 +1,99 @@ +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=minimal; + +create table inventory_txt +( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int +) +row format delimited fields terminated by '|' +stored as textfile; + +LOAD DATA LOCAL INPATH '../../data/files/inventory' OVERWRITE INTO TABLE inventory_txt; + +-- No column change case + +create table inventory_part_0( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int) +partitioned by (par string) stored as orc; + +insert into table inventory_part_0 partition(par='1') select * from inventory_txt; +insert into table inventory_part_0 partition(par='2') select * from inventory_txt; + +explain +select sum(inv_quantity_on_hand) from inventory_part_0; + +select sum(inv_quantity_on_hand) from inventory_part_0; + +-- Additional column for 2nd partition... + +create table inventory_part_1( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int) +partitioned by (par string) stored as orc; + +insert into table inventory_part_1 partition(par='4cols') select * from inventory_txt; + +alter table inventory_part_1 add columns (fifthcol string); + +insert into table inventory_part_1 partition(par='5cols') select *, '5th' as fifthcol from inventory_txt; + +explain +select sum(inv_quantity_on_hand) from inventory_part_1; + +select sum(inv_quantity_on_hand) from inventory_part_1; + +-- Verify we do not vectorize when a partition column name is different. +-- Currently, we do not attempt the actual select because non-vectorized ORC table reader gets a cast exception. + +create table inventory_part_2a( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int) +partitioned by (par string) stored as orc; + +insert into table inventory_part_2a partition(par='1') select * from inventory_txt; +insert into table inventory_part_2a partition(par='2') select * from inventory_txt; +alter table inventory_part_2a partition (par='2') change inv_item_sk other_name int; + +explain +select sum(inv_quantity_on_hand) from inventory_part_2a; + +create table inventory_part_2b( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int) +partitioned by (par1 string, par2 int) stored as orc; + +insert into table inventory_part_2b partition(par1='1',par2=4) select * from inventory_txt; +insert into table inventory_part_2b partition(par1='2',par2=3) select * from inventory_txt; +alter table inventory_part_2b partition (par1='2',par2=3) change inv_quantity_on_hand other_name int; + +explain +select sum(inv_quantity_on_hand) from inventory_part_2b; + +-- Verify we do not vectorize when a partition column type is different. +-- Currently, we do not attempt the actual select because non-vectorized ORC table reader gets a cast exception. + +create table inventory_part_3( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int) +partitioned by (par string) stored as orc; + +insert into table inventory_part_3 partition(par='1') select * from inventory_txt; +insert into table inventory_part_3 partition(par='2') select * from inventory_txt; +alter table inventory_part_3 partition (par='2') change inv_warehouse_sk inv_warehouse_sk bigint; + +explain +select sum(inv_quantity_on_hand) from inventory_part_3; \ No newline at end of file diff --git ql/src/test/results/clientpositive/tez/vector_partition_diff_num_cols.q.out ql/src/test/results/clientpositive/tez/vector_partition_diff_num_cols.q.out new file mode 100644 index 0000000..7d860f7 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_partition_diff_num_cols.q.out @@ -0,0 +1,630 @@ +PREHOOK: query: create table inventory_txt +( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int +) +row format delimited fields terminated by '|' +stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@inventory_txt +POSTHOOK: query: create table inventory_txt +( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int +) +row format delimited fields terminated by '|' +stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@inventory_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/inventory' OVERWRITE INTO TABLE inventory_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@inventory_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/inventory' OVERWRITE INTO TABLE inventory_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@inventory_txt +PREHOOK: query: -- No column change case + +create table inventory_part_0( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int) +partitioned by (par string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@inventory_part_0 +POSTHOOK: query: -- No column change case + +create table inventory_part_0( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int) +partitioned by (par string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@inventory_part_0 +PREHOOK: query: insert into table inventory_part_0 partition(par='1') select * from inventory_txt +PREHOOK: type: QUERY +PREHOOK: Input: default@inventory_txt +PREHOOK: Output: default@inventory_part_0@par=1 +POSTHOOK: query: insert into table inventory_part_0 partition(par='1') select * from inventory_txt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@inventory_txt +POSTHOOK: Output: default@inventory_part_0@par=1 +POSTHOOK: Lineage: inventory_part_0 PARTITION(par=1).inv_date_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_date_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_0 PARTITION(par=1).inv_item_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_item_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_0 PARTITION(par=1).inv_quantity_on_hand SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_quantity_on_hand, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_0 PARTITION(par=1).inv_warehouse_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_warehouse_sk, type:int, comment:null), ] +PREHOOK: query: insert into table inventory_part_0 partition(par='2') select * from inventory_txt +PREHOOK: type: QUERY +PREHOOK: Input: default@inventory_txt +PREHOOK: Output: default@inventory_part_0@par=2 +POSTHOOK: query: insert into table inventory_part_0 partition(par='2') select * from inventory_txt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@inventory_txt +POSTHOOK: Output: default@inventory_part_0@par=2 +POSTHOOK: Lineage: inventory_part_0 PARTITION(par=2).inv_date_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_date_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_0 PARTITION(par=2).inv_item_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_item_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_0 PARTITION(par=2).inv_quantity_on_hand SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_quantity_on_hand, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_0 PARTITION(par=2).inv_warehouse_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_warehouse_sk, type:int, comment:null), ] +PREHOOK: query: explain +select sum(inv_quantity_on_hand) from inventory_part_0 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select sum(inv_quantity_on_hand) from inventory_part_0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: inventory_part_0 + Statistics: Num rows: 200 Data size: 3176 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: inv_quantity_on_hand (type: int) + outputColumnNames: inv_quantity_on_hand + Statistics: Num rows: 200 Data size: 3176 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(inv_quantity_on_hand) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(inv_quantity_on_hand) from inventory_part_0 +PREHOOK: type: QUERY +PREHOOK: Input: default@inventory_part_0 +PREHOOK: Input: default@inventory_part_0@par=1 +PREHOOK: Input: default@inventory_part_0@par=2 +#### A masked pattern was here #### +POSTHOOK: query: select sum(inv_quantity_on_hand) from inventory_part_0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@inventory_part_0 +POSTHOOK: Input: default@inventory_part_0@par=1 +POSTHOOK: Input: default@inventory_part_0@par=2 +#### A masked pattern was here #### +105970 +PREHOOK: query: -- Additional column for 2nd partition... + +create table inventory_part_1( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int) +partitioned by (par string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@inventory_part_1 +POSTHOOK: query: -- Additional column for 2nd partition... + +create table inventory_part_1( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int) +partitioned by (par string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@inventory_part_1 +PREHOOK: query: insert into table inventory_part_1 partition(par='4cols') select * from inventory_txt +PREHOOK: type: QUERY +PREHOOK: Input: default@inventory_txt +PREHOOK: Output: default@inventory_part_1@par=4cols +POSTHOOK: query: insert into table inventory_part_1 partition(par='4cols') select * from inventory_txt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@inventory_txt +POSTHOOK: Output: default@inventory_part_1@par=4cols +POSTHOOK: Lineage: inventory_part_1 PARTITION(par=4cols).inv_date_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_date_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_1 PARTITION(par=4cols).inv_item_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_item_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_1 PARTITION(par=4cols).inv_quantity_on_hand SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_quantity_on_hand, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_1 PARTITION(par=4cols).inv_warehouse_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_warehouse_sk, type:int, comment:null), ] +PREHOOK: query: alter table inventory_part_1 add columns (fifthcol string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@inventory_part_1 +PREHOOK: Output: default@inventory_part_1 +POSTHOOK: query: alter table inventory_part_1 add columns (fifthcol string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@inventory_part_1 +POSTHOOK: Output: default@inventory_part_1 +PREHOOK: query: insert into table inventory_part_1 partition(par='5cols') select *, '5th' as fifthcol from inventory_txt +PREHOOK: type: QUERY +PREHOOK: Input: default@inventory_txt +PREHOOK: Output: default@inventory_part_1@par=5cols +POSTHOOK: query: insert into table inventory_part_1 partition(par='5cols') select *, '5th' as fifthcol from inventory_txt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@inventory_txt +POSTHOOK: Output: default@inventory_part_1@par=5cols +POSTHOOK: Lineage: inventory_part_1 PARTITION(par=5cols).fifthcol SIMPLE [] +POSTHOOK: Lineage: inventory_part_1 PARTITION(par=5cols).inv_date_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_date_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_1 PARTITION(par=5cols).inv_item_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_item_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_1 PARTITION(par=5cols).inv_quantity_on_hand SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_quantity_on_hand, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_1 PARTITION(par=5cols).inv_warehouse_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_warehouse_sk, type:int, comment:null), ] +PREHOOK: query: explain +select sum(inv_quantity_on_hand) from inventory_part_1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select sum(inv_quantity_on_hand) from inventory_part_1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: inventory_part_1 + Statistics: Num rows: 200 Data size: 11876 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: inv_quantity_on_hand (type: int) + outputColumnNames: inv_quantity_on_hand + Statistics: Num rows: 200 Data size: 11876 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(inv_quantity_on_hand) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(inv_quantity_on_hand) from inventory_part_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@inventory_part_1 +PREHOOK: Input: default@inventory_part_1@par=4cols +PREHOOK: Input: default@inventory_part_1@par=5cols +#### A masked pattern was here #### +POSTHOOK: query: select sum(inv_quantity_on_hand) from inventory_part_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@inventory_part_1 +POSTHOOK: Input: default@inventory_part_1@par=4cols +POSTHOOK: Input: default@inventory_part_1@par=5cols +#### A masked pattern was here #### +105970 +PREHOOK: query: -- Verify we do not vectorize when a partition column name is different. +-- Currently, we do not attempt the actual select because non-vectorized ORC table reader gets a cast exception. + +create table inventory_part_2a( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int) +partitioned by (par string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@inventory_part_2a +POSTHOOK: query: -- Verify we do not vectorize when a partition column name is different. +-- Currently, we do not attempt the actual select because non-vectorized ORC table reader gets a cast exception. + +create table inventory_part_2a( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int) +partitioned by (par string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@inventory_part_2a +PREHOOK: query: insert into table inventory_part_2a partition(par='1') select * from inventory_txt +PREHOOK: type: QUERY +PREHOOK: Input: default@inventory_txt +PREHOOK: Output: default@inventory_part_2a@par=1 +POSTHOOK: query: insert into table inventory_part_2a partition(par='1') select * from inventory_txt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@inventory_txt +POSTHOOK: Output: default@inventory_part_2a@par=1 +POSTHOOK: Lineage: inventory_part_2a PARTITION(par=1).inv_date_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_date_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_2a PARTITION(par=1).inv_item_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_item_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_2a PARTITION(par=1).inv_quantity_on_hand SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_quantity_on_hand, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_2a PARTITION(par=1).inv_warehouse_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_warehouse_sk, type:int, comment:null), ] +PREHOOK: query: insert into table inventory_part_2a partition(par='2') select * from inventory_txt +PREHOOK: type: QUERY +PREHOOK: Input: default@inventory_txt +PREHOOK: Output: default@inventory_part_2a@par=2 +POSTHOOK: query: insert into table inventory_part_2a partition(par='2') select * from inventory_txt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@inventory_txt +POSTHOOK: Output: default@inventory_part_2a@par=2 +POSTHOOK: Lineage: inventory_part_2a PARTITION(par=2).inv_date_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_date_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_2a PARTITION(par=2).inv_item_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_item_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_2a PARTITION(par=2).inv_quantity_on_hand SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_quantity_on_hand, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_2a PARTITION(par=2).inv_warehouse_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_warehouse_sk, type:int, comment:null), ] +PREHOOK: query: alter table inventory_part_2a partition (par='2') change inv_item_sk other_name int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@inventory_part_2a +PREHOOK: Output: default@inventory_part_2a@par=2 +POSTHOOK: query: alter table inventory_part_2a partition (par='2') change inv_item_sk other_name int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@inventory_part_2a +POSTHOOK: Input: default@inventory_part_2a@par=2 +POSTHOOK: Output: default@inventory_part_2a@par=2 +PREHOOK: query: explain +select sum(inv_quantity_on_hand) from inventory_part_2a +PREHOOK: type: QUERY +POSTHOOK: query: explain +select sum(inv_quantity_on_hand) from inventory_part_2a +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: inventory_part_2a + Statistics: Num rows: 200 Data size: 3176 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: inv_quantity_on_hand (type: int) + outputColumnNames: inv_quantity_on_hand + Statistics: Num rows: 200 Data size: 3176 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(inv_quantity_on_hand) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: create table inventory_part_2b( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int) +partitioned by (par1 string, par2 int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@inventory_part_2b +POSTHOOK: query: create table inventory_part_2b( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int) +partitioned by (par1 string, par2 int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@inventory_part_2b +PREHOOK: query: insert into table inventory_part_2b partition(par1='1',par2=4) select * from inventory_txt +PREHOOK: type: QUERY +PREHOOK: Input: default@inventory_txt +PREHOOK: Output: default@inventory_part_2b@par1=1/par2=4 +POSTHOOK: query: insert into table inventory_part_2b partition(par1='1',par2=4) select * from inventory_txt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@inventory_txt +POSTHOOK: Output: default@inventory_part_2b@par1=1/par2=4 +POSTHOOK: Lineage: inventory_part_2b PARTITION(par1=1,par2=4).inv_date_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_date_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_2b PARTITION(par1=1,par2=4).inv_item_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_item_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_2b PARTITION(par1=1,par2=4).inv_quantity_on_hand SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_quantity_on_hand, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_2b PARTITION(par1=1,par2=4).inv_warehouse_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_warehouse_sk, type:int, comment:null), ] +PREHOOK: query: insert into table inventory_part_2b partition(par1='2',par2=3) select * from inventory_txt +PREHOOK: type: QUERY +PREHOOK: Input: default@inventory_txt +PREHOOK: Output: default@inventory_part_2b@par1=2/par2=3 +POSTHOOK: query: insert into table inventory_part_2b partition(par1='2',par2=3) select * from inventory_txt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@inventory_txt +POSTHOOK: Output: default@inventory_part_2b@par1=2/par2=3 +POSTHOOK: Lineage: inventory_part_2b PARTITION(par1=2,par2=3).inv_date_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_date_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_2b PARTITION(par1=2,par2=3).inv_item_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_item_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_2b PARTITION(par1=2,par2=3).inv_quantity_on_hand SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_quantity_on_hand, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_2b PARTITION(par1=2,par2=3).inv_warehouse_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_warehouse_sk, type:int, comment:null), ] +PREHOOK: query: alter table inventory_part_2b partition (par1='2',par2=3) change inv_quantity_on_hand other_name int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@inventory_part_2b +PREHOOK: Output: default@inventory_part_2b@par1=2/par2=3 +POSTHOOK: query: alter table inventory_part_2b partition (par1='2',par2=3) change inv_quantity_on_hand other_name int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@inventory_part_2b +POSTHOOK: Input: default@inventory_part_2b@par1=2/par2=3 +POSTHOOK: Output: default@inventory_part_2b@par1=2/par2=3 +PREHOOK: query: explain +select sum(inv_quantity_on_hand) from inventory_part_2b +PREHOOK: type: QUERY +POSTHOOK: query: explain +select sum(inv_quantity_on_hand) from inventory_part_2b +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: inventory_part_2b + Statistics: Num rows: 200 Data size: 3176 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: inv_quantity_on_hand (type: int) + outputColumnNames: inv_quantity_on_hand + Statistics: Num rows: 200 Data size: 3176 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(inv_quantity_on_hand) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- Verify we do not vectorize when a partition column type is different. +-- Currently, we do not attempt the actual select because non-vectorized ORC table reader gets a cast exception. + +create table inventory_part_3( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int) +partitioned by (par string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@inventory_part_3 +POSTHOOK: query: -- Verify we do not vectorize when a partition column type is different. +-- Currently, we do not attempt the actual select because non-vectorized ORC table reader gets a cast exception. + +create table inventory_part_3( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int) +partitioned by (par string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@inventory_part_3 +PREHOOK: query: insert into table inventory_part_3 partition(par='1') select * from inventory_txt +PREHOOK: type: QUERY +PREHOOK: Input: default@inventory_txt +PREHOOK: Output: default@inventory_part_3@par=1 +POSTHOOK: query: insert into table inventory_part_3 partition(par='1') select * from inventory_txt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@inventory_txt +POSTHOOK: Output: default@inventory_part_3@par=1 +POSTHOOK: Lineage: inventory_part_3 PARTITION(par=1).inv_date_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_date_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_3 PARTITION(par=1).inv_item_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_item_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_3 PARTITION(par=1).inv_quantity_on_hand SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_quantity_on_hand, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_3 PARTITION(par=1).inv_warehouse_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_warehouse_sk, type:int, comment:null), ] +PREHOOK: query: insert into table inventory_part_3 partition(par='2') select * from inventory_txt +PREHOOK: type: QUERY +PREHOOK: Input: default@inventory_txt +PREHOOK: Output: default@inventory_part_3@par=2 +POSTHOOK: query: insert into table inventory_part_3 partition(par='2') select * from inventory_txt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@inventory_txt +POSTHOOK: Output: default@inventory_part_3@par=2 +POSTHOOK: Lineage: inventory_part_3 PARTITION(par=2).inv_date_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_date_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_3 PARTITION(par=2).inv_item_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_item_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_3 PARTITION(par=2).inv_quantity_on_hand SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_quantity_on_hand, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_3 PARTITION(par=2).inv_warehouse_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_warehouse_sk, type:int, comment:null), ] +PREHOOK: query: alter table inventory_part_3 partition (par='2') change inv_warehouse_sk inv_warehouse_sk bigint +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@inventory_part_3 +PREHOOK: Output: default@inventory_part_3@par=2 +POSTHOOK: query: alter table inventory_part_3 partition (par='2') change inv_warehouse_sk inv_warehouse_sk bigint +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@inventory_part_3 +POSTHOOK: Input: default@inventory_part_3@par=2 +POSTHOOK: Output: default@inventory_part_3@par=2 +PREHOOK: query: explain +select sum(inv_quantity_on_hand) from inventory_part_3 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select sum(inv_quantity_on_hand) from inventory_part_3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: inventory_part_3 + Statistics: Num rows: 200 Data size: 3176 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: inv_quantity_on_hand (type: int) + outputColumnNames: inv_quantity_on_hand + Statistics: Num rows: 200 Data size: 3176 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(inv_quantity_on_hand) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out new file mode 100644 index 0000000..33307de --- /dev/null +++ ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out @@ -0,0 +1,595 @@ +PREHOOK: query: create table inventory_txt +( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int +) +row format delimited fields terminated by '|' +stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@inventory_txt +POSTHOOK: query: create table inventory_txt +( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int +) +row format delimited fields terminated by '|' +stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@inventory_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/inventory' OVERWRITE INTO TABLE inventory_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@inventory_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/inventory' OVERWRITE INTO TABLE inventory_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@inventory_txt +PREHOOK: query: -- No column change case + +create table inventory_part_0( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int) +partitioned by (par string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@inventory_part_0 +POSTHOOK: query: -- No column change case + +create table inventory_part_0( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int) +partitioned by (par string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@inventory_part_0 +PREHOOK: query: insert into table inventory_part_0 partition(par='1') select * from inventory_txt +PREHOOK: type: QUERY +PREHOOK: Input: default@inventory_txt +PREHOOK: Output: default@inventory_part_0@par=1 +POSTHOOK: query: insert into table inventory_part_0 partition(par='1') select * from inventory_txt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@inventory_txt +POSTHOOK: Output: default@inventory_part_0@par=1 +POSTHOOK: Lineage: inventory_part_0 PARTITION(par=1).inv_date_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_date_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_0 PARTITION(par=1).inv_item_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_item_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_0 PARTITION(par=1).inv_quantity_on_hand SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_quantity_on_hand, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_0 PARTITION(par=1).inv_warehouse_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_warehouse_sk, type:int, comment:null), ] +PREHOOK: query: insert into table inventory_part_0 partition(par='2') select * from inventory_txt +PREHOOK: type: QUERY +PREHOOK: Input: default@inventory_txt +PREHOOK: Output: default@inventory_part_0@par=2 +POSTHOOK: query: insert into table inventory_part_0 partition(par='2') select * from inventory_txt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@inventory_txt +POSTHOOK: Output: default@inventory_part_0@par=2 +POSTHOOK: Lineage: inventory_part_0 PARTITION(par=2).inv_date_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_date_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_0 PARTITION(par=2).inv_item_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_item_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_0 PARTITION(par=2).inv_quantity_on_hand SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_quantity_on_hand, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_0 PARTITION(par=2).inv_warehouse_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_warehouse_sk, type:int, comment:null), ] +PREHOOK: query: explain +select sum(inv_quantity_on_hand) from inventory_part_0 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select sum(inv_quantity_on_hand) from inventory_part_0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: inventory_part_0 + Statistics: Num rows: 200 Data size: 3176 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: inv_quantity_on_hand (type: int) + outputColumnNames: inv_quantity_on_hand + Statistics: Num rows: 200 Data size: 3176 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(inv_quantity_on_hand) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(inv_quantity_on_hand) from inventory_part_0 +PREHOOK: type: QUERY +PREHOOK: Input: default@inventory_part_0 +PREHOOK: Input: default@inventory_part_0@par=1 +PREHOOK: Input: default@inventory_part_0@par=2 +#### A masked pattern was here #### +POSTHOOK: query: select sum(inv_quantity_on_hand) from inventory_part_0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@inventory_part_0 +POSTHOOK: Input: default@inventory_part_0@par=1 +POSTHOOK: Input: default@inventory_part_0@par=2 +#### A masked pattern was here #### +105970 +PREHOOK: query: -- Additional column for 2nd partition... + +create table inventory_part_1( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int) +partitioned by (par string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@inventory_part_1 +POSTHOOK: query: -- Additional column for 2nd partition... + +create table inventory_part_1( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int) +partitioned by (par string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@inventory_part_1 +PREHOOK: query: insert into table inventory_part_1 partition(par='4cols') select * from inventory_txt +PREHOOK: type: QUERY +PREHOOK: Input: default@inventory_txt +PREHOOK: Output: default@inventory_part_1@par=4cols +POSTHOOK: query: insert into table inventory_part_1 partition(par='4cols') select * from inventory_txt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@inventory_txt +POSTHOOK: Output: default@inventory_part_1@par=4cols +POSTHOOK: Lineage: inventory_part_1 PARTITION(par=4cols).inv_date_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_date_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_1 PARTITION(par=4cols).inv_item_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_item_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_1 PARTITION(par=4cols).inv_quantity_on_hand SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_quantity_on_hand, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_1 PARTITION(par=4cols).inv_warehouse_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_warehouse_sk, type:int, comment:null), ] +PREHOOK: query: alter table inventory_part_1 add columns (fifthcol string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@inventory_part_1 +PREHOOK: Output: default@inventory_part_1 +POSTHOOK: query: alter table inventory_part_1 add columns (fifthcol string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@inventory_part_1 +POSTHOOK: Output: default@inventory_part_1 +PREHOOK: query: insert into table inventory_part_1 partition(par='5cols') select *, '5th' as fifthcol from inventory_txt +PREHOOK: type: QUERY +PREHOOK: Input: default@inventory_txt +PREHOOK: Output: default@inventory_part_1@par=5cols +POSTHOOK: query: insert into table inventory_part_1 partition(par='5cols') select *, '5th' as fifthcol from inventory_txt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@inventory_txt +POSTHOOK: Output: default@inventory_part_1@par=5cols +POSTHOOK: Lineage: inventory_part_1 PARTITION(par=5cols).fifthcol SIMPLE [] +POSTHOOK: Lineage: inventory_part_1 PARTITION(par=5cols).inv_date_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_date_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_1 PARTITION(par=5cols).inv_item_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_item_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_1 PARTITION(par=5cols).inv_quantity_on_hand SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_quantity_on_hand, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_1 PARTITION(par=5cols).inv_warehouse_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_warehouse_sk, type:int, comment:null), ] +PREHOOK: query: explain +select sum(inv_quantity_on_hand) from inventory_part_1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select sum(inv_quantity_on_hand) from inventory_part_1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: inventory_part_1 + Statistics: Num rows: 200 Data size: 11876 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: inv_quantity_on_hand (type: int) + outputColumnNames: inv_quantity_on_hand + Statistics: Num rows: 200 Data size: 11876 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(inv_quantity_on_hand) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(inv_quantity_on_hand) from inventory_part_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@inventory_part_1 +PREHOOK: Input: default@inventory_part_1@par=4cols +PREHOOK: Input: default@inventory_part_1@par=5cols +#### A masked pattern was here #### +POSTHOOK: query: select sum(inv_quantity_on_hand) from inventory_part_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@inventory_part_1 +POSTHOOK: Input: default@inventory_part_1@par=4cols +POSTHOOK: Input: default@inventory_part_1@par=5cols +#### A masked pattern was here #### +105970 +PREHOOK: query: -- Verify we do not vectorize when a partition column name is different. +-- Currently, we do not attempt the actual select because non-vectorized ORC table reader gets a cast exception. + +create table inventory_part_2a( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int) +partitioned by (par string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@inventory_part_2a +POSTHOOK: query: -- Verify we do not vectorize when a partition column name is different. +-- Currently, we do not attempt the actual select because non-vectorized ORC table reader gets a cast exception. + +create table inventory_part_2a( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int) +partitioned by (par string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@inventory_part_2a +PREHOOK: query: insert into table inventory_part_2a partition(par='1') select * from inventory_txt +PREHOOK: type: QUERY +PREHOOK: Input: default@inventory_txt +PREHOOK: Output: default@inventory_part_2a@par=1 +POSTHOOK: query: insert into table inventory_part_2a partition(par='1') select * from inventory_txt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@inventory_txt +POSTHOOK: Output: default@inventory_part_2a@par=1 +POSTHOOK: Lineage: inventory_part_2a PARTITION(par=1).inv_date_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_date_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_2a PARTITION(par=1).inv_item_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_item_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_2a PARTITION(par=1).inv_quantity_on_hand SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_quantity_on_hand, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_2a PARTITION(par=1).inv_warehouse_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_warehouse_sk, type:int, comment:null), ] +PREHOOK: query: insert into table inventory_part_2a partition(par='2') select * from inventory_txt +PREHOOK: type: QUERY +PREHOOK: Input: default@inventory_txt +PREHOOK: Output: default@inventory_part_2a@par=2 +POSTHOOK: query: insert into table inventory_part_2a partition(par='2') select * from inventory_txt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@inventory_txt +POSTHOOK: Output: default@inventory_part_2a@par=2 +POSTHOOK: Lineage: inventory_part_2a PARTITION(par=2).inv_date_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_date_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_2a PARTITION(par=2).inv_item_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_item_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_2a PARTITION(par=2).inv_quantity_on_hand SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_quantity_on_hand, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_2a PARTITION(par=2).inv_warehouse_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_warehouse_sk, type:int, comment:null), ] +PREHOOK: query: alter table inventory_part_2a partition (par='2') change inv_item_sk other_name int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@inventory_part_2a +PREHOOK: Output: default@inventory_part_2a@par=2 +POSTHOOK: query: alter table inventory_part_2a partition (par='2') change inv_item_sk other_name int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@inventory_part_2a +POSTHOOK: Input: default@inventory_part_2a@par=2 +POSTHOOK: Output: default@inventory_part_2a@par=2 +PREHOOK: query: explain +select sum(inv_quantity_on_hand) from inventory_part_2a +PREHOOK: type: QUERY +POSTHOOK: query: explain +select sum(inv_quantity_on_hand) from inventory_part_2a +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: inventory_part_2a + Statistics: Num rows: 200 Data size: 3176 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: inv_quantity_on_hand (type: int) + outputColumnNames: inv_quantity_on_hand + Statistics: Num rows: 200 Data size: 3176 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(inv_quantity_on_hand) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: create table inventory_part_2b( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int) +partitioned by (par1 string, par2 int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@inventory_part_2b +POSTHOOK: query: create table inventory_part_2b( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int) +partitioned by (par1 string, par2 int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@inventory_part_2b +PREHOOK: query: insert into table inventory_part_2b partition(par1='1',par2=4) select * from inventory_txt +PREHOOK: type: QUERY +PREHOOK: Input: default@inventory_txt +PREHOOK: Output: default@inventory_part_2b@par1=1/par2=4 +POSTHOOK: query: insert into table inventory_part_2b partition(par1='1',par2=4) select * from inventory_txt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@inventory_txt +POSTHOOK: Output: default@inventory_part_2b@par1=1/par2=4 +POSTHOOK: Lineage: inventory_part_2b PARTITION(par1=1,par2=4).inv_date_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_date_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_2b PARTITION(par1=1,par2=4).inv_item_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_item_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_2b PARTITION(par1=1,par2=4).inv_quantity_on_hand SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_quantity_on_hand, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_2b PARTITION(par1=1,par2=4).inv_warehouse_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_warehouse_sk, type:int, comment:null), ] +PREHOOK: query: insert into table inventory_part_2b partition(par1='2',par2=3) select * from inventory_txt +PREHOOK: type: QUERY +PREHOOK: Input: default@inventory_txt +PREHOOK: Output: default@inventory_part_2b@par1=2/par2=3 +POSTHOOK: query: insert into table inventory_part_2b partition(par1='2',par2=3) select * from inventory_txt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@inventory_txt +POSTHOOK: Output: default@inventory_part_2b@par1=2/par2=3 +POSTHOOK: Lineage: inventory_part_2b PARTITION(par1=2,par2=3).inv_date_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_date_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_2b PARTITION(par1=2,par2=3).inv_item_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_item_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_2b PARTITION(par1=2,par2=3).inv_quantity_on_hand SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_quantity_on_hand, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_2b PARTITION(par1=2,par2=3).inv_warehouse_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_warehouse_sk, type:int, comment:null), ] +PREHOOK: query: alter table inventory_part_2b partition (par1='2',par2=3) change inv_quantity_on_hand other_name int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@inventory_part_2b +PREHOOK: Output: default@inventory_part_2b@par1=2/par2=3 +POSTHOOK: query: alter table inventory_part_2b partition (par1='2',par2=3) change inv_quantity_on_hand other_name int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@inventory_part_2b +POSTHOOK: Input: default@inventory_part_2b@par1=2/par2=3 +POSTHOOK: Output: default@inventory_part_2b@par1=2/par2=3 +PREHOOK: query: explain +select sum(inv_quantity_on_hand) from inventory_part_2b +PREHOOK: type: QUERY +POSTHOOK: query: explain +select sum(inv_quantity_on_hand) from inventory_part_2b +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: inventory_part_2b + Statistics: Num rows: 200 Data size: 3176 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: inv_quantity_on_hand (type: int) + outputColumnNames: inv_quantity_on_hand + Statistics: Num rows: 200 Data size: 3176 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(inv_quantity_on_hand) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- Verify we do not vectorize when a partition column type is different. +-- Currently, we do not attempt the actual select because non-vectorized ORC table reader gets a cast exception. + +create table inventory_part_3( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int) +partitioned by (par string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@inventory_part_3 +POSTHOOK: query: -- Verify we do not vectorize when a partition column type is different. +-- Currently, we do not attempt the actual select because non-vectorized ORC table reader gets a cast exception. + +create table inventory_part_3( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int) +partitioned by (par string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@inventory_part_3 +PREHOOK: query: insert into table inventory_part_3 partition(par='1') select * from inventory_txt +PREHOOK: type: QUERY +PREHOOK: Input: default@inventory_txt +PREHOOK: Output: default@inventory_part_3@par=1 +POSTHOOK: query: insert into table inventory_part_3 partition(par='1') select * from inventory_txt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@inventory_txt +POSTHOOK: Output: default@inventory_part_3@par=1 +POSTHOOK: Lineage: inventory_part_3 PARTITION(par=1).inv_date_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_date_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_3 PARTITION(par=1).inv_item_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_item_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_3 PARTITION(par=1).inv_quantity_on_hand SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_quantity_on_hand, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_3 PARTITION(par=1).inv_warehouse_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_warehouse_sk, type:int, comment:null), ] +PREHOOK: query: insert into table inventory_part_3 partition(par='2') select * from inventory_txt +PREHOOK: type: QUERY +PREHOOK: Input: default@inventory_txt +PREHOOK: Output: default@inventory_part_3@par=2 +POSTHOOK: query: insert into table inventory_part_3 partition(par='2') select * from inventory_txt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@inventory_txt +POSTHOOK: Output: default@inventory_part_3@par=2 +POSTHOOK: Lineage: inventory_part_3 PARTITION(par=2).inv_date_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_date_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_3 PARTITION(par=2).inv_item_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_item_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_3 PARTITION(par=2).inv_quantity_on_hand SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_quantity_on_hand, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_3 PARTITION(par=2).inv_warehouse_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_warehouse_sk, type:int, comment:null), ] +PREHOOK: query: alter table inventory_part_3 partition (par='2') change inv_warehouse_sk inv_warehouse_sk bigint +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@inventory_part_3 +PREHOOK: Output: default@inventory_part_3@par=2 +POSTHOOK: query: alter table inventory_part_3 partition (par='2') change inv_warehouse_sk inv_warehouse_sk bigint +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@inventory_part_3 +POSTHOOK: Input: default@inventory_part_3@par=2 +POSTHOOK: Output: default@inventory_part_3@par=2 +PREHOOK: query: explain +select sum(inv_quantity_on_hand) from inventory_part_3 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select sum(inv_quantity_on_hand) from inventory_part_3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: inventory_part_3 + Statistics: Num rows: 200 Data size: 3176 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: inv_quantity_on_hand (type: int) + outputColumnNames: inv_quantity_on_hand + Statistics: Num rows: 200 Data size: 3176 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(inv_quantity_on_hand) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +