diff --git data/files/inventory data/files/inventory new file mode 100644 index 0000000..49fd58a --- /dev/null +++ data/files/inventory @@ -0,0 +1,100 @@ +2450815|1|1|211| +2450815|2|1|235| +2450815|4|1|859| +2450815|7|1|704| +2450815|8|1|891| +2450815|10|1|834| +2450815|13|1|591| +2450815|14|1|579| +2450815|16|1|622| +2450815|19|1|745| +2450815|20|1|405| +2450815|22|1|| +2450815|25|1|676| +2450815|26|1|865| +2450815|28|1|24| +2450815|31|1|170| +2450815|32|1|797| +2450815|34|1|332| +2450815|37|1|618| +2450815|38|1|909| +2450815|40|1|738| +2450815|43|1|699| +2450815|44|1|975| +2450815|46|1|551| +2450815|49|1|857| +2450815|50|1|894| +2450815|52|1|279| +2450815|55|1|206| +2450815|56|1|746| +2450815|58|1|| +2450815|61|1|154| +2450815|62|1|888| +2450815|64|1|773| +2450815|67|1|107| +2450815|68|1|801| +2450815|70|1|749| +2450815|73|1|419| +2450815|74|1|541| +2450815|76|1|359| +2450815|79|1|578| +2450815|80|1|42| +2450815|82|1|83| +2450815|85|1|192| +2450815|86|1|567| +2450815|88|1|710| +2450815|91|1|282| +2450815|92|1|631| +2450815|94|1|587| +2450815|97|1|645| +2450815|98|1|667| +2450815|100|1|26| +2450815|103|1|224| +2450815|104|1|901| +2450815|106|1|887| +2450815|109|1|455| +2450815|110|1|784| +2450815|112|1|742| +2450815|115|1|978| +2450815|116|1|207| +2450815|118|1|462| +2450815|121|1|406| +2450815|122|1|559| +2450815|124|1|999| +2450815|127|1|870| +2450815|128|1|982| +2450815|130|1|700| +2450815|133|1|26| +2450815|134|1|356| +2450815|136|1|143| +2450815|139|1|838| +2450815|140|1|846| +2450815|142|1|657| +2450815|145|1|181| +2450815|146|1|730| +2450815|148|1|49| +2450815|151|1|652| +2450815|152|1|861| +2450815|154|1|329| +2450815|157|1|286| +2450815|158|1|| +2450815|160|1|62| +2450815|163|1|894| +2450815|164|1|463| +2450815|166|1|458| +2450815|169|1|696| +2450815|170|1|450| +2450815|172|1|842| +2450815|175|1|79| +2450815|176|1|260| +2450815|178|1|894| +2450815|181|1|4| +2450815|182|1|281| +2450815|184|1|797| +2450815|187|1|981| +2450815|188|1|58| +2450815|190|1|834| +2450815|193|1|54| +2450815|194|1|856| +2450815|196|1|803| +2450815|199|1|296| diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index fc8c8cc..318f0be 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -202,6 +202,7 @@ minitez.query.files.shared=alter_merge_2_orc.q,\ vector_mapjoin_reduce.q,\ vector_non_string_partition.q,\ vector_orderby_5.q,\ + vector_partition_diff_num_cols.q,\ vector_partitioned_date_time.q,\ vector_reduce_groupby_decimal.q,\ vector_string_concat.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index dbc9bb2..156bff6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -126,6 +126,7 @@ VectorExpressionDescriptor vMap; + private List initialColumnTypes; private List projectedColumns; private List projectionColumnNames; private Map projectionColumnMap; @@ -139,6 +140,7 @@ public VectorizationContext(List initialColumnNames) { this.projectionColumnNames = initialColumnNames; + initialColumnTypes = null; projectedColumns = new ArrayList(); projectionColumnMap = new HashMap(); for (int i = 0; i < this.projectionColumnNames.size(); i++) { @@ -154,6 +156,7 @@ public VectorizationContext(List initialColumnNames) { // Constructor to with the individual addInitialColumn method // followed by a call to finishedAddingInitialColumns. public VectorizationContext() { + initialColumnTypes = new ArrayList(); projectedColumns = new ArrayList(); projectionColumnNames = new ArrayList(); projectionColumnMap = new HashMap(); @@ -166,6 +169,7 @@ public VectorizationContext() { // Use with resetProjectionColumns and addProjectionColumn. // Keeps existing output column map, etc. public VectorizationContext(VectorizationContext vContext) { + initialColumnTypes = vContext.initialColumnTypes; this.projectedColumns = new ArrayList(); this.projectionColumnNames = new ArrayList(); this.projectionColumnMap = new HashMap(); @@ -177,7 +181,16 @@ public VectorizationContext(VectorizationContext vContext) { // Add an initial column to a vectorization context when // a vectorized row batch is being created. + public void addInitialColumn(String columnName, String typeName) { + initialColumnTypes.add(typeName); + int index = projectedColumns.size(); + projectedColumns.add(index); + projectionColumnNames.add(columnName); + projectionColumnMap.put(columnName, index); + } + public void addInitialColumn(String columnName) { + initialColumnTypes.add(""); int index = projectedColumns.size(); projectedColumns.add(index); projectionColumnNames.add(columnName); @@ -206,6 +219,10 @@ public void addProjectionColumn(String columnName, int vectorBatchColIndex) { projectionColumnMap.put(columnName, vectorBatchColIndex); } + public List getInitialColumnTypes() { + return initialColumnTypes; + } + public List getProjectedColumns() { return projectedColumns; } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 2c02bd4..6c63153 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -27,6 +27,7 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Properties; import java.util.Set; import java.util.Stack; import java.util.TreeMap; @@ -35,6 +36,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.exec.*; import org.apache.hadoop.hive.ql.exec.mr.MapRedTask; import org.apache.hadoop.hive.ql.exec.tez.TezTask; @@ -72,6 +74,7 @@ import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.ReduceWork; import org.apache.hadoop.hive.ql.plan.SMBJoinDesc; +import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.ql.plan.TezWork; import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; @@ -345,7 +348,7 @@ private boolean validateMapWork(MapWork mapWork, boolean isTez) throws SemanticE } } Map opRules = new LinkedHashMap(); - MapWorkValidationNodeProcessor vnp = new MapWorkValidationNodeProcessor(isTez); + MapWorkValidationNodeProcessor vnp = new MapWorkValidationNodeProcessor(mapWork, isTez); addMapWorkRules(opRules, vnp); Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null); GraphWalker ogw = new DefaultGraphWalker(disp); @@ -515,9 +518,11 @@ private void vectorizeReduceWork(ReduceWork reduceWork) throws SemanticException class MapWorkValidationNodeProcessor implements NodeProcessor { + private MapWork mapWork; private boolean isTez; - public MapWorkValidationNodeProcessor(boolean isTez) { + public MapWorkValidationNodeProcessor(MapWork mapWork, boolean isTez) { + this.mapWork = mapWork; this.isTez = isTez; } @@ -529,7 +534,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, if (nonVectorizableChildOfGroupBy(op)) { return new Boolean(true); } - boolean ret = validateMapWorkOperator(op, isTez); + boolean ret = validateMapWorkOperator(op, mapWork, isTez); if (!ret) { LOG.info("MapWork Operator: " + op.getName() + " could not be vectorized."); return new Boolean(false); @@ -845,7 +850,7 @@ public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException { return pctx; } - boolean validateMapWorkOperator(Operator op, boolean isTez) { + boolean validateMapWorkOperator(Operator op, MapWork mWork, boolean isTez) { boolean ret = false; switch (op.getType()) { case MAPJOIN: @@ -868,7 +873,7 @@ boolean validateMapWorkOperator(Operator op, boolean isT ret = validateReduceSinkOperator((ReduceSinkOperator) op); break; case TABLESCAN: - ret = validateTableScanOperator((TableScanOperator) op); + ret = validateTableScanOperator((TableScanOperator) op, mWork); break; case FILESINK: case LIMIT: @@ -950,9 +955,71 @@ private boolean validateSMBMapJoinOperator(SMBMapJoinOperator op) { return validateMapJoinDesc(desc); } - private boolean validateTableScanOperator(TableScanOperator op) { + private boolean validateTableScanOperator(TableScanOperator op, MapWork mWork) { TableScanDesc desc = op.getConf(); - return !desc.isGatherStats(); + if (desc.isGatherStats()) { + return false; + } + HiveConf hiveConf = physicalContext.getConf(); + VectorizationContext vContext = getVectorizationContext(op, physicalContext); + List columnNames = vContext.getProjectionColumnNames(); + List columnTypes = vContext.getInitialColumnTypes(); + LinkedHashMap partitionDescs = mWork.getPathToPartitionInfo(); + for (Map.Entry entry : partitionDescs.entrySet()) { + String partKey = entry.getKey(); + PartitionDesc part = entry.getValue(); + Properties partProps = + (part.getPartSpec() == null || part.getPartSpec().isEmpty()) ? + part.getTableDesc().getProperties() : part.getProperties(); + Class serdeclass; + Deserializer partDeserializer; + StructObjectInspector oi; + try { + serdeclass = hiveConf.getClassByName(part.getSerdeClassName()); + partDeserializer = (Deserializer) serdeclass.newInstance(); + SerDeUtils.initializeSerDe(partDeserializer, hiveConf, part.getTableDesc().getProperties(), + partProps); + oi = (StructObjectInspector) partDeserializer + .getObjectInspector(); + } catch (Exception e) { + LOG.info("Failed to vectorize", e); + return false; + } + String partCols = partProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS); + String[] partNames = null; + if (partCols != null && partCols.length() > 0) { + partNames = partCols.trim().split("/"); + } + int partCount = (partNames == null ? 0 : partNames.length); + List fieldRefs = oi.getAllStructFieldRefs(); + int totalCols = fieldRefs.size() + partCount; + if (totalCols != columnNames.size()) { + LOG.info( + String.format("Could not vectorize because input %s has a different number of physical columns (TableScanOperator column count including partitions: %d; input table column count + partition count %d + %d = %d)", + partKey, columnNames.size(), fieldRefs.size(), partCount, totalCols)); + return false; + } + for (int i = 0; i < fieldRefs.size(); i++) { + StructField field = fieldRefs.get(i); + String fieldName = field.getFieldName(); + String columnName = columnNames.get(i); + if (!fieldName.equalsIgnoreCase(columnName)) { + LOG.info( + String.format("Could not vectorize because input %s has a different column name for column #%d (table column name: %s; partition column name: %s)", + partKey, i, columnName, fieldName)); + return false; + } + String fieldType = field.getFieldObjectInspector().getTypeName(); + String columnType = columnTypes.get(i); + if (!fieldType.equalsIgnoreCase(columnType)) { + LOG.info( + String.format("Could not vectorize because input %s has a different column type for column #%d (table column type: %s; partition column type: %s)", + partKey, i, columnType, fieldType)); + return false; + } + } + } + return true; } private boolean validateMapJoinOperator(MapJoinOperator op) { @@ -1218,7 +1285,7 @@ private VectorizationContext getVectorizationContext(Operator op, for (ColumnInfo c : rs.getSignature()) { // Earlier, validation code should have eliminated virtual columns usage (HIVE-5560). if (!isVirtualColumn(c)) { - vContext.addInitialColumn(c.getInternalName()); + vContext.addInitialColumn(c.getInternalName(), c.getTypeName()); } } vContext.finishedAddingInitialColumns(); diff --git ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java index 3271189..ec47c08 100644 --- ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java +++ ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java @@ -108,7 +108,7 @@ public void testAggregateOnUDF() throws HiveException { gbyOp.setConf(desc); Vectorizer v = new Vectorizer(); - Assert.assertTrue(v.validateMapWorkOperator(gbyOp, false)); + Assert.assertTrue(v.validateMapWorkOperator(gbyOp, null, false)); VectorGroupByOperator vectorOp = (VectorGroupByOperator) v.vectorizeOperator(gbyOp, vContext); Assert.assertEquals(VectorUDAFSumLong.class, vectorOp.getAggregators()[0].getClass()); VectorUDAFSumLong udaf = (VectorUDAFSumLong) vectorOp.getAggregators()[0]; @@ -188,7 +188,7 @@ public void testValidateMapJoinOperator() { map.setConf(mjdesc); Vectorizer vectorizer = new Vectorizer(); - Assert.assertTrue(vectorizer.validateMapWorkOperator(map, false)); + Assert.assertTrue(vectorizer.validateMapWorkOperator(map, null, false)); } @@ -204,6 +204,6 @@ public void testValidateSMBJoinOperator() { map.setConf(mjdesc); Vectorizer vectorizer = new Vectorizer(); - Assert.assertTrue(vectorizer.validateMapWorkOperator(map, false)); + Assert.assertTrue(vectorizer.validateMapWorkOperator(map, null, false)); } } diff --git ql/src/test/queries/clientpositive/vector_partition_diff_num_cols.q ql/src/test/queries/clientpositive/vector_partition_diff_num_cols.q new file mode 100644 index 0000000..30ea590 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_partition_diff_num_cols.q @@ -0,0 +1,99 @@ +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=minimal; + +create table inventory_txt +( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int +) +row format delimited fields terminated by '|' +stored as textfile; + +LOAD DATA LOCAL INPATH '../../data/files/inventory' OVERWRITE INTO TABLE inventory_txt; + +-- No column change case + +create table inventory_part_0( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int) +partitioned by (par string) stored as orc; + +insert into table inventory_part_0 partition(par='1') select * from inventory_txt; +insert into table inventory_part_0 partition(par='2') select * from inventory_txt; + +explain +select sum(inv_quantity_on_hand) from inventory_part_0; + +select sum(inv_quantity_on_hand) from inventory_part_0; + +-- Additional column for 2nd partition... + +create table inventory_part_1( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int) +partitioned by (par string) stored as orc; + +insert into table inventory_part_1 partition(par='4cols') select * from inventory_txt; + +alter table inventory_part_1 add columns (fifthcol string); + +insert into table inventory_part_1 partition(par='5cols') select *, '5th' as fifthcol from inventory_txt; + +explain +select sum(inv_quantity_on_hand) from inventory_part_1; + +select sum(inv_quantity_on_hand) from inventory_part_1; + +-- Verify we do not vectorize when a partition column name is different. +-- Currently, we do not attempt the actual select because non-vectorized ORC table reader gets a cast exception. + +create table inventory_part_2a( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int) +partitioned by (par string) stored as orc; + +insert into table inventory_part_2a partition(par='1') select * from inventory_txt; +insert into table inventory_part_2a partition(par='2') select * from inventory_txt; +alter table inventory_part_2a partition (par='2') change inv_item_sk other_name int; + +explain +select sum(inv_quantity_on_hand) from inventory_part_2a; + +create table inventory_part_2b( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int) +partitioned by (par1 string, par2 int) stored as orc; + +insert into table inventory_part_2b partition(par1='1',par2=4) select * from inventory_txt; +insert into table inventory_part_2b partition(par1='2',par2=3) select * from inventory_txt; +alter table inventory_part_2b partition (par1='2',par2=3) change inv_quantity_on_hand other_name int; + +explain +select sum(inv_quantity_on_hand) from inventory_part_2b; + +-- Verify we do not vectorize when a partition column type is different. +-- Currently, we do not attempt the actual select because non-vectorized ORC table reader gets a cast exception. + +create table inventory_part_3( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int) +partitioned by (par string) stored as orc; + +insert into table inventory_part_3 partition(par='1') select * from inventory_txt; +insert into table inventory_part_3 partition(par='2') select * from inventory_txt; +alter table inventory_part_3 partition (par='2') change inv_warehouse_sk inv_warehouse_sk bigint; + +explain +select sum(inv_quantity_on_hand) from inventory_part_3; \ No newline at end of file diff --git ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out new file mode 100644 index 0000000..33307de --- /dev/null +++ ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out @@ -0,0 +1,595 @@ +PREHOOK: query: create table inventory_txt +( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int +) +row format delimited fields terminated by '|' +stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@inventory_txt +POSTHOOK: query: create table inventory_txt +( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int +) +row format delimited fields terminated by '|' +stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@inventory_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/inventory' OVERWRITE INTO TABLE inventory_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@inventory_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/inventory' OVERWRITE INTO TABLE inventory_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@inventory_txt +PREHOOK: query: -- No column change case + +create table inventory_part_0( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int) +partitioned by (par string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@inventory_part_0 +POSTHOOK: query: -- No column change case + +create table inventory_part_0( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int) +partitioned by (par string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@inventory_part_0 +PREHOOK: query: insert into table inventory_part_0 partition(par='1') select * from inventory_txt +PREHOOK: type: QUERY +PREHOOK: Input: default@inventory_txt +PREHOOK: Output: default@inventory_part_0@par=1 +POSTHOOK: query: insert into table inventory_part_0 partition(par='1') select * from inventory_txt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@inventory_txt +POSTHOOK: Output: default@inventory_part_0@par=1 +POSTHOOK: Lineage: inventory_part_0 PARTITION(par=1).inv_date_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_date_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_0 PARTITION(par=1).inv_item_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_item_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_0 PARTITION(par=1).inv_quantity_on_hand SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_quantity_on_hand, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_0 PARTITION(par=1).inv_warehouse_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_warehouse_sk, type:int, comment:null), ] +PREHOOK: query: insert into table inventory_part_0 partition(par='2') select * from inventory_txt +PREHOOK: type: QUERY +PREHOOK: Input: default@inventory_txt +PREHOOK: Output: default@inventory_part_0@par=2 +POSTHOOK: query: insert into table inventory_part_0 partition(par='2') select * from inventory_txt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@inventory_txt +POSTHOOK: Output: default@inventory_part_0@par=2 +POSTHOOK: Lineage: inventory_part_0 PARTITION(par=2).inv_date_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_date_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_0 PARTITION(par=2).inv_item_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_item_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_0 PARTITION(par=2).inv_quantity_on_hand SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_quantity_on_hand, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_0 PARTITION(par=2).inv_warehouse_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_warehouse_sk, type:int, comment:null), ] +PREHOOK: query: explain +select sum(inv_quantity_on_hand) from inventory_part_0 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select sum(inv_quantity_on_hand) from inventory_part_0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: inventory_part_0 + Statistics: Num rows: 200 Data size: 3176 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: inv_quantity_on_hand (type: int) + outputColumnNames: inv_quantity_on_hand + Statistics: Num rows: 200 Data size: 3176 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(inv_quantity_on_hand) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(inv_quantity_on_hand) from inventory_part_0 +PREHOOK: type: QUERY +PREHOOK: Input: default@inventory_part_0 +PREHOOK: Input: default@inventory_part_0@par=1 +PREHOOK: Input: default@inventory_part_0@par=2 +#### A masked pattern was here #### +POSTHOOK: query: select sum(inv_quantity_on_hand) from inventory_part_0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@inventory_part_0 +POSTHOOK: Input: default@inventory_part_0@par=1 +POSTHOOK: Input: default@inventory_part_0@par=2 +#### A masked pattern was here #### +105970 +PREHOOK: query: -- Additional column for 2nd partition... + +create table inventory_part_1( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int) +partitioned by (par string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@inventory_part_1 +POSTHOOK: query: -- Additional column for 2nd partition... + +create table inventory_part_1( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int) +partitioned by (par string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@inventory_part_1 +PREHOOK: query: insert into table inventory_part_1 partition(par='4cols') select * from inventory_txt +PREHOOK: type: QUERY +PREHOOK: Input: default@inventory_txt +PREHOOK: Output: default@inventory_part_1@par=4cols +POSTHOOK: query: insert into table inventory_part_1 partition(par='4cols') select * from inventory_txt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@inventory_txt +POSTHOOK: Output: default@inventory_part_1@par=4cols +POSTHOOK: Lineage: inventory_part_1 PARTITION(par=4cols).inv_date_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_date_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_1 PARTITION(par=4cols).inv_item_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_item_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_1 PARTITION(par=4cols).inv_quantity_on_hand SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_quantity_on_hand, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_1 PARTITION(par=4cols).inv_warehouse_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_warehouse_sk, type:int, comment:null), ] +PREHOOK: query: alter table inventory_part_1 add columns (fifthcol string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@inventory_part_1 +PREHOOK: Output: default@inventory_part_1 +POSTHOOK: query: alter table inventory_part_1 add columns (fifthcol string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@inventory_part_1 +POSTHOOK: Output: default@inventory_part_1 +PREHOOK: query: insert into table inventory_part_1 partition(par='5cols') select *, '5th' as fifthcol from inventory_txt +PREHOOK: type: QUERY +PREHOOK: Input: default@inventory_txt +PREHOOK: Output: default@inventory_part_1@par=5cols +POSTHOOK: query: insert into table inventory_part_1 partition(par='5cols') select *, '5th' as fifthcol from inventory_txt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@inventory_txt +POSTHOOK: Output: default@inventory_part_1@par=5cols +POSTHOOK: Lineage: inventory_part_1 PARTITION(par=5cols).fifthcol SIMPLE [] +POSTHOOK: Lineage: inventory_part_1 PARTITION(par=5cols).inv_date_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_date_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_1 PARTITION(par=5cols).inv_item_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_item_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_1 PARTITION(par=5cols).inv_quantity_on_hand SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_quantity_on_hand, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_1 PARTITION(par=5cols).inv_warehouse_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_warehouse_sk, type:int, comment:null), ] +PREHOOK: query: explain +select sum(inv_quantity_on_hand) from inventory_part_1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select sum(inv_quantity_on_hand) from inventory_part_1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: inventory_part_1 + Statistics: Num rows: 200 Data size: 11876 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: inv_quantity_on_hand (type: int) + outputColumnNames: inv_quantity_on_hand + Statistics: Num rows: 200 Data size: 11876 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(inv_quantity_on_hand) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(inv_quantity_on_hand) from inventory_part_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@inventory_part_1 +PREHOOK: Input: default@inventory_part_1@par=4cols +PREHOOK: Input: default@inventory_part_1@par=5cols +#### A masked pattern was here #### +POSTHOOK: query: select sum(inv_quantity_on_hand) from inventory_part_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@inventory_part_1 +POSTHOOK: Input: default@inventory_part_1@par=4cols +POSTHOOK: Input: default@inventory_part_1@par=5cols +#### A masked pattern was here #### +105970 +PREHOOK: query: -- Verify we do not vectorize when a partition column name is different. +-- Currently, we do not attempt the actual select because non-vectorized ORC table reader gets a cast exception. + +create table inventory_part_2a( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int) +partitioned by (par string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@inventory_part_2a +POSTHOOK: query: -- Verify we do not vectorize when a partition column name is different. +-- Currently, we do not attempt the actual select because non-vectorized ORC table reader gets a cast exception. + +create table inventory_part_2a( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int) +partitioned by (par string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@inventory_part_2a +PREHOOK: query: insert into table inventory_part_2a partition(par='1') select * from inventory_txt +PREHOOK: type: QUERY +PREHOOK: Input: default@inventory_txt +PREHOOK: Output: default@inventory_part_2a@par=1 +POSTHOOK: query: insert into table inventory_part_2a partition(par='1') select * from inventory_txt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@inventory_txt +POSTHOOK: Output: default@inventory_part_2a@par=1 +POSTHOOK: Lineage: inventory_part_2a PARTITION(par=1).inv_date_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_date_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_2a PARTITION(par=1).inv_item_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_item_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_2a PARTITION(par=1).inv_quantity_on_hand SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_quantity_on_hand, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_2a PARTITION(par=1).inv_warehouse_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_warehouse_sk, type:int, comment:null), ] +PREHOOK: query: insert into table inventory_part_2a partition(par='2') select * from inventory_txt +PREHOOK: type: QUERY +PREHOOK: Input: default@inventory_txt +PREHOOK: Output: default@inventory_part_2a@par=2 +POSTHOOK: query: insert into table inventory_part_2a partition(par='2') select * from inventory_txt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@inventory_txt +POSTHOOK: Output: default@inventory_part_2a@par=2 +POSTHOOK: Lineage: inventory_part_2a PARTITION(par=2).inv_date_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_date_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_2a PARTITION(par=2).inv_item_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_item_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_2a PARTITION(par=2).inv_quantity_on_hand SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_quantity_on_hand, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_2a PARTITION(par=2).inv_warehouse_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_warehouse_sk, type:int, comment:null), ] +PREHOOK: query: alter table inventory_part_2a partition (par='2') change inv_item_sk other_name int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@inventory_part_2a +PREHOOK: Output: default@inventory_part_2a@par=2 +POSTHOOK: query: alter table inventory_part_2a partition (par='2') change inv_item_sk other_name int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@inventory_part_2a +POSTHOOK: Input: default@inventory_part_2a@par=2 +POSTHOOK: Output: default@inventory_part_2a@par=2 +PREHOOK: query: explain +select sum(inv_quantity_on_hand) from inventory_part_2a +PREHOOK: type: QUERY +POSTHOOK: query: explain +select sum(inv_quantity_on_hand) from inventory_part_2a +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: inventory_part_2a + Statistics: Num rows: 200 Data size: 3176 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: inv_quantity_on_hand (type: int) + outputColumnNames: inv_quantity_on_hand + Statistics: Num rows: 200 Data size: 3176 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(inv_quantity_on_hand) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: create table inventory_part_2b( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int) +partitioned by (par1 string, par2 int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@inventory_part_2b +POSTHOOK: query: create table inventory_part_2b( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int) +partitioned by (par1 string, par2 int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@inventory_part_2b +PREHOOK: query: insert into table inventory_part_2b partition(par1='1',par2=4) select * from inventory_txt +PREHOOK: type: QUERY +PREHOOK: Input: default@inventory_txt +PREHOOK: Output: default@inventory_part_2b@par1=1/par2=4 +POSTHOOK: query: insert into table inventory_part_2b partition(par1='1',par2=4) select * from inventory_txt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@inventory_txt +POSTHOOK: Output: default@inventory_part_2b@par1=1/par2=4 +POSTHOOK: Lineage: inventory_part_2b PARTITION(par1=1,par2=4).inv_date_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_date_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_2b PARTITION(par1=1,par2=4).inv_item_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_item_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_2b PARTITION(par1=1,par2=4).inv_quantity_on_hand SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_quantity_on_hand, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_2b PARTITION(par1=1,par2=4).inv_warehouse_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_warehouse_sk, type:int, comment:null), ] +PREHOOK: query: insert into table inventory_part_2b partition(par1='2',par2=3) select * from inventory_txt +PREHOOK: type: QUERY +PREHOOK: Input: default@inventory_txt +PREHOOK: Output: default@inventory_part_2b@par1=2/par2=3 +POSTHOOK: query: insert into table inventory_part_2b partition(par1='2',par2=3) select * from inventory_txt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@inventory_txt +POSTHOOK: Output: default@inventory_part_2b@par1=2/par2=3 +POSTHOOK: Lineage: inventory_part_2b PARTITION(par1=2,par2=3).inv_date_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_date_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_2b PARTITION(par1=2,par2=3).inv_item_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_item_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_2b PARTITION(par1=2,par2=3).inv_quantity_on_hand SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_quantity_on_hand, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_2b PARTITION(par1=2,par2=3).inv_warehouse_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_warehouse_sk, type:int, comment:null), ] +PREHOOK: query: alter table inventory_part_2b partition (par1='2',par2=3) change inv_quantity_on_hand other_name int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@inventory_part_2b +PREHOOK: Output: default@inventory_part_2b@par1=2/par2=3 +POSTHOOK: query: alter table inventory_part_2b partition (par1='2',par2=3) change inv_quantity_on_hand other_name int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@inventory_part_2b +POSTHOOK: Input: default@inventory_part_2b@par1=2/par2=3 +POSTHOOK: Output: default@inventory_part_2b@par1=2/par2=3 +PREHOOK: query: explain +select sum(inv_quantity_on_hand) from inventory_part_2b +PREHOOK: type: QUERY +POSTHOOK: query: explain +select sum(inv_quantity_on_hand) from inventory_part_2b +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: inventory_part_2b + Statistics: Num rows: 200 Data size: 3176 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: inv_quantity_on_hand (type: int) + outputColumnNames: inv_quantity_on_hand + Statistics: Num rows: 200 Data size: 3176 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(inv_quantity_on_hand) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- Verify we do not vectorize when a partition column type is different. +-- Currently, we do not attempt the actual select because non-vectorized ORC table reader gets a cast exception. + +create table inventory_part_3( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int) +partitioned by (par string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@inventory_part_3 +POSTHOOK: query: -- Verify we do not vectorize when a partition column type is different. +-- Currently, we do not attempt the actual select because non-vectorized ORC table reader gets a cast exception. + +create table inventory_part_3( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int) +partitioned by (par string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@inventory_part_3 +PREHOOK: query: insert into table inventory_part_3 partition(par='1') select * from inventory_txt +PREHOOK: type: QUERY +PREHOOK: Input: default@inventory_txt +PREHOOK: Output: default@inventory_part_3@par=1 +POSTHOOK: query: insert into table inventory_part_3 partition(par='1') select * from inventory_txt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@inventory_txt +POSTHOOK: Output: default@inventory_part_3@par=1 +POSTHOOK: Lineage: inventory_part_3 PARTITION(par=1).inv_date_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_date_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_3 PARTITION(par=1).inv_item_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_item_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_3 PARTITION(par=1).inv_quantity_on_hand SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_quantity_on_hand, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_3 PARTITION(par=1).inv_warehouse_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_warehouse_sk, type:int, comment:null), ] +PREHOOK: query: insert into table inventory_part_3 partition(par='2') select * from inventory_txt +PREHOOK: type: QUERY +PREHOOK: Input: default@inventory_txt +PREHOOK: Output: default@inventory_part_3@par=2 +POSTHOOK: query: insert into table inventory_part_3 partition(par='2') select * from inventory_txt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@inventory_txt +POSTHOOK: Output: default@inventory_part_3@par=2 +POSTHOOK: Lineage: inventory_part_3 PARTITION(par=2).inv_date_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_date_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_3 PARTITION(par=2).inv_item_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_item_sk, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_3 PARTITION(par=2).inv_quantity_on_hand SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_quantity_on_hand, type:int, comment:null), ] +POSTHOOK: Lineage: inventory_part_3 PARTITION(par=2).inv_warehouse_sk SIMPLE [(inventory_txt)inventory_txt.FieldSchema(name:inv_warehouse_sk, type:int, comment:null), ] +PREHOOK: query: alter table inventory_part_3 partition (par='2') change inv_warehouse_sk inv_warehouse_sk bigint +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@inventory_part_3 +PREHOOK: Output: default@inventory_part_3@par=2 +POSTHOOK: query: alter table inventory_part_3 partition (par='2') change inv_warehouse_sk inv_warehouse_sk bigint +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@inventory_part_3 +POSTHOOK: Input: default@inventory_part_3@par=2 +POSTHOOK: Output: default@inventory_part_3@par=2 +PREHOOK: query: explain +select sum(inv_quantity_on_hand) from inventory_part_3 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select sum(inv_quantity_on_hand) from inventory_part_3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: inventory_part_3 + Statistics: Num rows: 200 Data size: 3176 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: inv_quantity_on_hand (type: int) + outputColumnNames: inv_quantity_on_hand + Statistics: Num rows: 200 Data size: 3176 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(inv_quantity_on_hand) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/vectorized_context.q.out ql/src/test/results/clientpositive/vectorized_context.q.out index fe24fd7..b97c169 100644 --- ql/src/test/results/clientpositive/vectorized_context.q.out +++ ql/src/test/results/clientpositive/vectorized_context.q.out @@ -188,7 +188,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work - Execution mode: vectorized Stage: Stage-0 Fetch Operator