diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 47a1107..b3a901e 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -221,6 +221,7 @@ minitez.query.files.shared=alter_merge_2_orc.q,\ vector_elt.q,\ vector_groupby_3.q,\ vector_groupby_reduce.q,\ + vector_groupby_reduce2.q,\ vector_grouping_sets.q,\ vector_if_expr.q,\ vector_inner_join.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java index 39a83e3..917f406 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java @@ -675,7 +675,7 @@ public void close(boolean aborted) throws HiveException { * writeGroupRow does this and finally increments outputBatch.size. * */ - private class ProcessingModeGroupBatches extends ProcessingModeBase { + private class ProcessingModeReduceMergePartialKeys extends ProcessingModeBase { private boolean inGroup; private boolean first; @@ -761,7 +761,8 @@ public VectorGroupByOperator(VectorizationContext vContext, OperatorDesc conf) aggregators = new VectorAggregateExpression[aggrDesc.size()]; for (int i = 0; i < aggrDesc.size(); ++i) { AggregationDesc aggDesc = aggrDesc.get(i); - aggregators[i] = vContext.getAggregatorExpression(aggDesc, desc.getVectorDesc().isReduce()); + aggregators[i] = + vContext.getAggregatorExpression(aggDesc, desc.getVectorDesc().isReduceMergePartial()); } isVectorOutput = desc.getVectorDesc().isVectorOutput(); @@ -803,8 +804,8 @@ public VectorGroupByOperator() { objectInspectors.add(aggregators[i].getOutputObjectInspector()); } - if (!conf.getVectorDesc().isVectorGroupBatches()) { - // These data structures are only used by the map-side processing modes. + if (outputKeyLength > 0 && !conf.getVectorDesc().isReduceMergePartial()) { + // These data structures are only used by the non Reduce Merge-Partial Keys processing modes. keyWrappersBatch = VectorHashKeyWrapperBatch.compileKeyWrapperBatch(keyExpressions); aggregationBatchInfo = new VectorAggregationBufferBatch(); aggregationBatchInfo.compileAggregationBatchInfo(aggregators); @@ -830,10 +831,11 @@ public VectorGroupByOperator() { forwardCache = new Object[outputKeyLength + aggregators.length]; if (outputKeyLength == 0) { - processingMode = this.new ProcessingModeGlobalAggregate(); - } else if (conf.getVectorDesc().isVectorGroupBatches()) { + // Hash and MergePartial global aggregation are both handled here. + processingMode = this.new ProcessingModeGlobalAggregate(); + } else if (conf.getVectorDesc().isReduceMergePartial()) { // Sorted GroupBy of vector batches where an individual batch has the same group key (e.g. reduce). - processingMode = this.new ProcessingModeGroupBatches(); + processingMode = this.new ProcessingModeReduceMergePartialKeys(); } else { // We start in hash mode and may dynamically switch to unsorted stream mode. processingMode = this.new ProcessingModeHashAggregate(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 392e56d..8fbf064 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -2138,7 +2138,7 @@ public static String mapTypeNameSynonyms(String typeName) { add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdSampDecimal.class)); }}; - public VectorAggregateExpression getAggregatorExpression(AggregationDesc desc, boolean isReduce) + public VectorAggregateExpression getAggregatorExpression(AggregationDesc desc, boolean isReduceMergePartial) throws HiveException { ArrayList paramDescList = desc.getParameters(); @@ -2166,11 +2166,11 @@ public VectorAggregateExpression getAggregatorExpression(AggregationDesc desc, b inputType == VectorExpressionDescriptor.ArgumentType.NONE) || (aggDef.getType().isSameTypeOrFamily(inputType)))) { - if (aggDef.getMode() == GroupByDesc.Mode.HASH && isReduce) { - continue; - } else if (aggDef.getMode() == GroupByDesc.Mode.MERGEPARTIAL && !isReduce) { - continue; - } + if (aggDef.getMode() == GroupByDesc.Mode.HASH && isReduceMergePartial) { + continue; + } else if (aggDef.getMode() == GroupByDesc.Mode.MERGEPARTIAL && !isReduceMergePartial) { + continue; + } Class aggClass = aggDef.getAggClass(); try @@ -2189,7 +2189,7 @@ public VectorAggregateExpression getAggregatorExpression(AggregationDesc desc, b } throw new HiveException("Vector aggregate not implemented: \"" + aggregateName + - "\" for type: \"" + inputType.name() + " (reduce-side = " + isReduce + ")"); + "\" for type: \"" + inputType.name() + " (reduce-merge-partial = " + isReduceMergePartial + ")"); } public int firstOutputColumnIndex() { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index e7b9c73..d881127 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -837,11 +837,6 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, LOG.debug("Vectorized ReduceWork operator " + vectorOp.getName() + " added vectorization context " + vNewContext.toString()); } } - if (vectorOp instanceof VectorGroupByOperator) { - VectorGroupByOperator groupBy = (VectorGroupByOperator) vectorOp; - VectorGroupByDesc vectorDesc = groupBy.getConf().getVectorDesc(); - vectorDesc.setVectorGroupBatches(true); - } if (saveRootVectorOp && op != vectorOp) { rootVectorOp = vectorOp; } @@ -1127,42 +1122,67 @@ private boolean validateGroupByOperator(GroupByOperator op, boolean isReduce, bo LOG.info("Cannot vectorize groupby key expression"); return false; } - ret = validateAggregationDesc(desc.getAggregators(), isReduce); - if (!ret) { - LOG.info("Cannot vectorize groupby aggregate expression"); - return false; - } - if (isReduce) { - if (desc.isDistinct()) { - LOG.info("Distinct not supported in reduce vector mode"); - return false; - } - // Sort-based GroupBy? - if (desc.getMode() != GroupByDesc.Mode.COMPLETE && - desc.getMode() != GroupByDesc.Mode.PARTIAL1 && - desc.getMode() != GroupByDesc.Mode.PARTIAL2 && - desc.getMode() != GroupByDesc.Mode.MERGEPARTIAL) { - LOG.info("Reduce vector mode not supported when input for GROUP BY not sorted"); - return false; - } - LOG.info("Reduce GROUP BY mode is " + desc.getMode().name()); - if (!aggregatorsOutputIsPrimitive(desc.getAggregators(), isReduce)) { - LOG.info("Reduce vector mode only supported when aggregate outputs are primitive types"); + + boolean isMergePartial = (desc.getMode() != GroupByDesc.Mode.HASH); + + if (!isReduce) { + + // MapWork + + ret = validateHashAggregationDesc(desc.getAggregators()); + if (!ret) { return false; } - if (desc.getKeys().size() > 0) { - if (op.getParentOperators().size() > 0) { - LOG.info("Reduce vector mode can only handle a key group GROUP BY operator when it is fed by reduce-shuffle"); + } else { + + // ReduceWork + + if (isMergePartial) { + + // Reduce Merge-Partial GROUP BY. + + // A merge-partial GROUP BY is fed by grouping by keys from reduce-shuffle. It is the + // first (or root) operator for its reduce task. + + if (desc.isDistinct()) { + LOG.info("Vectorized Reduce MergePartial GROUP BY does not support DISTINCT"); return false; } - LOG.info("Reduce-side GROUP BY will process key groups"); - vectorDesc.setVectorGroupBatches(true); + + boolean hasKeys = (desc.getKeys().size() > 0); + + // Do we support merge-partial aggregation AND the output is primitive? + ret = validateReduceMergePartialAggregationDesc(desc.getAggregators(), hasKeys); + if (!ret) { + return false; + } + + if (hasKeys) { + if (op.getParentOperators().size() > 0) { + LOG.info("Vectorized Reduce MergePartial GROUP BY keys can only handle a key group when it is fed by reduce-shuffle"); + return false; + } + + LOG.info("Vectorized Reduce MergePartial GROUP BY will process key groups"); + + // Primitive output validation above means we can output VectorizedRowBatch to the + // children operators. + vectorDesc.setVectorOutput(true); + } else { + LOG.info("Vectorized Reduce MergePartial GROUP BY will do global aggregation"); + } + vectorDesc.setIsReduceMergePartial(true); } else { - LOG.info("Reduce-side GROUP BY will do global aggregation"); + + // Reduce Hash GROUP BY or global aggregation. + + ret = validateHashAggregationDesc(desc.getAggregators()); + if (!ret) { + return false; + } } - vectorDesc.setVectorOutput(true); - vectorDesc.setIsReduce(true); } + return true; } @@ -1185,9 +1205,18 @@ private boolean validateExprNodeDesc(List descs, return true; } - private boolean validateAggregationDesc(List descs, boolean isReduce) { + + private boolean validateHashAggregationDesc(List descs) { + return validateAggregationDesc(descs, /* isReduceMergePartial */ false, false); + } + + private boolean validateReduceMergePartialAggregationDesc(List descs, boolean hasKeys) { + return validateAggregationDesc(descs, /* isReduceMergePartial */ true, hasKeys); + } + + private boolean validateAggregationDesc(List descs, boolean isReduceMergePartial, boolean hasKeys) { for (AggregationDesc d : descs) { - boolean ret = validateAggregationDesc(d, isReduce); + boolean ret = validateAggregationDesc(d, isReduceMergePartial, hasKeys); if (!ret) { return false; } @@ -1264,7 +1293,9 @@ private boolean validateGenericUdf(ExprNodeGenericFuncDesc genericUDFExpr) { } } - private boolean validateAggregationDesc(AggregationDesc aggDesc, boolean isReduce) { + private boolean validateAggregationDesc(AggregationDesc aggDesc, boolean isReduceMergePartial, + boolean hasKeys) { + String udfName = aggDesc.getGenericUDAFName().toLowerCase(); if (!supportedAggregationUdfs.contains(udfName)) { LOG.info("Cannot vectorize groupby aggregate expression: UDF " + udfName + " not supported"); @@ -1274,47 +1305,27 @@ private boolean validateAggregationDesc(AggregationDesc aggDesc, boolean isReduc LOG.info("Cannot vectorize groupby aggregate expression: UDF parameters not supported"); return false; } - // See if we can vectorize the aggregation. - try { - VectorizationContext vc = new ValidatorVectorizationContext(); - if (vc.getAggregatorExpression(aggDesc, isReduce) == null) { - // TODO: this cannot happen - VectorizationContext throws in such cases. - LOG.info("getAggregatorExpression returned null"); - return false; - } - } catch (Exception e) { - LOG.info("Failed to vectorize", e); - return false; - } - return true; - } - private boolean aggregatorsOutputIsPrimitive(List descs, boolean isReduce) { - for (AggregationDesc d : descs) { - boolean ret = aggregatorsOutputIsPrimitive(d, isReduce); - if (!ret) { - return false; - } - } - return true; - } - - private boolean aggregatorsOutputIsPrimitive(AggregationDesc aggDesc, boolean isReduce) { + // See if we can vectorize the aggregation. VectorizationContext vc = new ValidatorVectorizationContext(); VectorAggregateExpression vectorAggrExpr; try { - vectorAggrExpr = vc.getAggregatorExpression(aggDesc, isReduce); + vectorAggrExpr = vc.getAggregatorExpression(aggDesc, isReduceMergePartial); } catch (Exception e) { // We should have already attempted to vectorize in validateAggregationDesc. LOG.info("Vectorization of aggreation should have succeeded ", e); return false; } - ObjectInspector outputObjInspector = vectorAggrExpr.getOutputObjectInspector(); - if (outputObjInspector.getCategory() == ObjectInspector.Category.PRIMITIVE) { - return true; + if (isReduceMergePartial && hasKeys) { + ObjectInspector outputObjInspector = vectorAggrExpr.getOutputObjectInspector(); + if (outputObjInspector.getCategory() != ObjectInspector.Category.PRIMITIVE) { + LOG.info("Vectorized Reduce MergePartial GROUP BY keys can only handle aggregate outputs that are primitive types"); + return false; + } } - return false; + + return true; } private boolean validateDataType(String type, VectorExpressionDescriptor.Mode mode) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java index b92c38b..7e791f2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java @@ -30,30 +30,21 @@ private static long serialVersionUID = 1L; - private boolean isReduce; - private boolean isVectorGroupBatches; + private boolean isReduceMergePartial; + private boolean isVectorOutput; public VectorGroupByDesc() { - this.isReduce = false; - this.isVectorGroupBatches = false; + this.isReduceMergePartial = false; this.isVectorOutput = false; } - public boolean isReduce() { - return isReduce; - } - - public void setIsReduce(boolean isReduce) { - this.isReduce = isReduce; - } - - public boolean isVectorGroupBatches() { - return isVectorGroupBatches; + public boolean isReduceMergePartial() { + return isReduceMergePartial; } - public void setVectorGroupBatches(boolean isVectorGroupBatches) { - this.isVectorGroupBatches = isVectorGroupBatches; + public void setIsReduceMergePartial(boolean isReduceMergePartial) { + this.isReduceMergePartial = isReduceMergePartial; } public boolean isVectorOutput() { diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java index 8c84f30..fdcf103 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java @@ -2268,7 +2268,7 @@ public void testAggregateCountReduceIterable ( GroupByDesc desc = buildGroupByDescType(ctx, "count", "A", TypeInfoFactory.longTypeInfo); VectorGroupByDesc vectorDesc = desc.getVectorDesc(); - vectorDesc.setIsReduce(true); + vectorDesc.setIsReduceMergePartial(true); VectorGroupByOperator vgo = new VectorGroupByOperator(ctx, desc); diff --git ql/src/test/queries/clientpositive/vector_groupby_reduce.q ql/src/test/queries/clientpositive/vector_groupby_reduce.q index 5da2089..fb041be 100644 --- ql/src/test/queries/clientpositive/vector_groupby_reduce.q +++ ql/src/test/queries/clientpositive/vector_groupby_reduce.q @@ -89,21 +89,9 @@ ss_sold_date_sk , ss_net_profit from store_sales_txt; -explain -select - ss_ticket_number -from - store_sales -group by ss_ticket_number -limit 20; - -select - ss_ticket_number -from - store_sales -group by ss_ticket_number -limit 20; - +-- The Reduce task has 2 MergePartial GROUP BY operators in a row. Currently, +-- we don't issue startGroup with keys out of the 1st vectorized GROUP BY, so we can't +-- vectorize the 2nd GROUP BY... explain select min(ss_ticket_number) diff --git ql/src/test/queries/clientpositive/vector_groupby_reduce2.q ql/src/test/queries/clientpositive/vector_groupby_reduce2.q new file mode 100644 index 0000000..e76a3f7 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_reduce2.q @@ -0,0 +1,105 @@ +SET hive.vectorized.execution.enabled=true; + +create table store_sales_txt +( + ss_sold_date_sk int, + ss_sold_time_sk int, + ss_item_sk int, + ss_customer_sk int, + ss_cdemo_sk int, + ss_hdemo_sk int, + ss_addr_sk int, + ss_store_sk int, + ss_promo_sk int, + ss_ticket_number int, + ss_quantity int, + ss_wholesale_cost float, + ss_list_price float, + ss_sales_price float, + ss_ext_discount_amt float, + ss_ext_sales_price float, + ss_ext_wholesale_cost float, + ss_ext_list_price float, + ss_ext_tax float, + ss_coupon_amt float, + ss_net_paid float, + ss_net_paid_inc_tax float, + ss_net_profit float +) +row format delimited fields terminated by '|' +stored as textfile; + +LOAD DATA LOCAL INPATH '../../data/files/store_sales.txt' OVERWRITE INTO TABLE store_sales_txt; + +create table store_sales +( + ss_sold_date_sk int, + ss_sold_time_sk int, + ss_item_sk int, + ss_customer_sk int, + ss_cdemo_sk int, + ss_hdemo_sk int, + ss_addr_sk int, + ss_store_sk int, + ss_promo_sk int, + ss_ticket_number int, + ss_quantity int, + ss_wholesale_cost float, + ss_list_price float, + ss_sales_price float, + ss_ext_discount_amt float, + ss_ext_sales_price float, + ss_ext_wholesale_cost float, + ss_ext_list_price float, + ss_ext_tax float, + ss_coupon_amt float, + ss_net_paid float, + ss_net_paid_inc_tax float, + ss_net_profit float +) +stored as orc +tblproperties ("orc.stripe.size"="33554432", "orc.compress.size"="16384"); + +set hive.exec.dynamic.partition.mode=nonstrict; + +insert overwrite table store_sales +select +ss_sold_date_sk , + ss_sold_time_sk , + ss_item_sk , + ss_customer_sk , + ss_cdemo_sk , + ss_hdemo_sk , + ss_addr_sk , + ss_store_sk , + ss_promo_sk , + ss_ticket_number , + ss_quantity , + ss_wholesale_cost , + ss_list_price , + ss_sales_price , + ss_ext_discount_amt , + ss_ext_sales_price , + ss_ext_wholesale_cost , + ss_ext_list_price , + ss_ext_tax , + ss_coupon_amt , + ss_net_paid , + ss_net_paid_inc_tax , + ss_net_profit + from store_sales_txt; + +explain +select + ss_ticket_number +from + store_sales +group by ss_ticket_number +limit 20; + +select + ss_ticket_number +from + store_sales +group by ss_ticket_number +limit 20; \ No newline at end of file diff --git ql/src/test/results/clientpositive/tez/vector_count_distinct.q.out ql/src/test/results/clientpositive/tez/vector_count_distinct.q.out index e6d34ff..95863a3 100644 --- ql/src/test/results/clientpositive/tez/vector_count_distinct.q.out +++ ql/src/test/results/clientpositive/tez/vector_count_distinct.q.out @@ -1287,6 +1287,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Execution mode: vectorized Reducer 3 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out index 637bb3b..a658650 100644 --- ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out +++ ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out @@ -211,117 +211,10 @@ POSTHOOK: Lineage: store_sales.ss_sold_time_sk SIMPLE [(store_sales_txt)store_sa POSTHOOK: Lineage: store_sales.ss_store_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_store_sk, type:int, comment:null), ] POSTHOOK: Lineage: store_sales.ss_ticket_number SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ticket_number, type:int, comment:null), ] POSTHOOK: Lineage: store_sales.ss_wholesale_cost SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_wholesale_cost, type:float, comment:null), ] -PREHOOK: query: explain -select - ss_ticket_number -from - store_sales -group by ss_ticket_number -limit 20 -PREHOOK: type: QUERY -POSTHOOK: query: explain -select - ss_ticket_number -from - store_sales -group by ss_ticket_number -limit 20 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: store_sales - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_ticket_number (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Reducer 2 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 20 - Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - - Stage: Stage-0 - Fetch Operator - limit: 20 - Processor Tree: - ListSink - -PREHOOK: query: select - ss_ticket_number -from - store_sales -group by ss_ticket_number -limit 20 -PREHOOK: type: QUERY -PREHOOK: Input: default@store_sales -#### A masked pattern was here #### -POSTHOOK: query: select - ss_ticket_number -from - store_sales -group by ss_ticket_number -limit 20 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@store_sales -#### A masked pattern was here #### -1 -2 -3 -4 -5 -6 -7 -8 -9 -10 -11 -12 -13 -14 -15 -16 -17 -18 -19 -20 -PREHOOK: query: explain +PREHOOK: query: -- The Reduce task has 2 MergePartial GROUP BY operators in a row. Currently, +-- we don't issue startGroup with keys out of the 1st vectorized GROUP BY, so we can't +-- vectorize the 2nd GROUP BY... +explain select min(ss_ticket_number) from @@ -333,7 +226,10 @@ from group by ss_ticket_number limit 20 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- The Reduce task has 2 MergePartial GROUP BY operators in a row. Currently, +-- we don't issue startGroup with keys out of the 1st vectorized GROUP BY, so we can't +-- vectorize the 2nd GROUP BY... +explain select min(ss_ticket_number) from diff --git ql/src/test/results/clientpositive/tez/vector_groupby_reduce2.q.out ql/src/test/results/clientpositive/tez/vector_groupby_reduce2.q.out new file mode 100644 index 0000000..4873df5 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_groupby_reduce2.q.out @@ -0,0 +1,323 @@ +PREHOOK: query: create table store_sales_txt +( + ss_sold_date_sk int, + ss_sold_time_sk int, + ss_item_sk int, + ss_customer_sk int, + ss_cdemo_sk int, + ss_hdemo_sk int, + ss_addr_sk int, + ss_store_sk int, + ss_promo_sk int, + ss_ticket_number int, + ss_quantity int, + ss_wholesale_cost float, + ss_list_price float, + ss_sales_price float, + ss_ext_discount_amt float, + ss_ext_sales_price float, + ss_ext_wholesale_cost float, + ss_ext_list_price float, + ss_ext_tax float, + ss_coupon_amt float, + ss_net_paid float, + ss_net_paid_inc_tax float, + ss_net_profit float +) +row format delimited fields terminated by '|' +stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@store_sales_txt +POSTHOOK: query: create table store_sales_txt +( + ss_sold_date_sk int, + ss_sold_time_sk int, + ss_item_sk int, + ss_customer_sk int, + ss_cdemo_sk int, + ss_hdemo_sk int, + ss_addr_sk int, + ss_store_sk int, + ss_promo_sk int, + ss_ticket_number int, + ss_quantity int, + ss_wholesale_cost float, + ss_list_price float, + ss_sales_price float, + ss_ext_discount_amt float, + ss_ext_sales_price float, + ss_ext_wholesale_cost float, + ss_ext_list_price float, + ss_ext_tax float, + ss_coupon_amt float, + ss_net_paid float, + ss_net_paid_inc_tax float, + ss_net_profit float +) +row format delimited fields terminated by '|' +stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@store_sales_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/store_sales.txt' OVERWRITE INTO TABLE store_sales_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@store_sales_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/store_sales.txt' OVERWRITE INTO TABLE store_sales_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@store_sales_txt +PREHOOK: query: create table store_sales +( + ss_sold_date_sk int, + ss_sold_time_sk int, + ss_item_sk int, + ss_customer_sk int, + ss_cdemo_sk int, + ss_hdemo_sk int, + ss_addr_sk int, + ss_store_sk int, + ss_promo_sk int, + ss_ticket_number int, + ss_quantity int, + ss_wholesale_cost float, + ss_list_price float, + ss_sales_price float, + ss_ext_discount_amt float, + ss_ext_sales_price float, + ss_ext_wholesale_cost float, + ss_ext_list_price float, + ss_ext_tax float, + ss_coupon_amt float, + ss_net_paid float, + ss_net_paid_inc_tax float, + ss_net_profit float +) +stored as orc +tblproperties ("orc.stripe.size"="33554432", "orc.compress.size"="16384") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@store_sales +POSTHOOK: query: create table store_sales +( + ss_sold_date_sk int, + ss_sold_time_sk int, + ss_item_sk int, + ss_customer_sk int, + ss_cdemo_sk int, + ss_hdemo_sk int, + ss_addr_sk int, + ss_store_sk int, + ss_promo_sk int, + ss_ticket_number int, + ss_quantity int, + ss_wholesale_cost float, + ss_list_price float, + ss_sales_price float, + ss_ext_discount_amt float, + ss_ext_sales_price float, + ss_ext_wholesale_cost float, + ss_ext_list_price float, + ss_ext_tax float, + ss_coupon_amt float, + ss_net_paid float, + ss_net_paid_inc_tax float, + ss_net_profit float +) +stored as orc +tblproperties ("orc.stripe.size"="33554432", "orc.compress.size"="16384") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@store_sales +PREHOOK: query: insert overwrite table store_sales +select +ss_sold_date_sk , + ss_sold_time_sk , + ss_item_sk , + ss_customer_sk , + ss_cdemo_sk , + ss_hdemo_sk , + ss_addr_sk , + ss_store_sk , + ss_promo_sk , + ss_ticket_number , + ss_quantity , + ss_wholesale_cost , + ss_list_price , + ss_sales_price , + ss_ext_discount_amt , + ss_ext_sales_price , + ss_ext_wholesale_cost , + ss_ext_list_price , + ss_ext_tax , + ss_coupon_amt , + ss_net_paid , + ss_net_paid_inc_tax , + ss_net_profit + from store_sales_txt +PREHOOK: type: QUERY +PREHOOK: Input: default@store_sales_txt +PREHOOK: Output: default@store_sales +POSTHOOK: query: insert overwrite table store_sales +select +ss_sold_date_sk , + ss_sold_time_sk , + ss_item_sk , + ss_customer_sk , + ss_cdemo_sk , + ss_hdemo_sk , + ss_addr_sk , + ss_store_sk , + ss_promo_sk , + ss_ticket_number , + ss_quantity , + ss_wholesale_cost , + ss_list_price , + ss_sales_price , + ss_ext_discount_amt , + ss_ext_sales_price , + ss_ext_wholesale_cost , + ss_ext_list_price , + ss_ext_tax , + ss_coupon_amt , + ss_net_paid , + ss_net_paid_inc_tax , + ss_net_profit + from store_sales_txt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@store_sales_txt +POSTHOOK: Output: default@store_sales +POSTHOOK: Lineage: store_sales.ss_addr_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_addr_sk, type:int, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_cdemo_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_cdemo_sk, type:int, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_coupon_amt SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_coupon_amt, type:float, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_customer_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_customer_sk, type:int, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_ext_discount_amt SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ext_discount_amt, type:float, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_ext_list_price SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ext_list_price, type:float, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_ext_sales_price SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ext_sales_price, type:float, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_ext_tax SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ext_tax, type:float, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_ext_wholesale_cost SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ext_wholesale_cost, type:float, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_hdemo_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_hdemo_sk, type:int, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_item_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_item_sk, type:int, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_list_price SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_list_price, type:float, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_net_paid SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_net_paid, type:float, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_net_paid_inc_tax SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_net_profit SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_net_profit, type:float, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_promo_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_promo_sk, type:int, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_quantity SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_quantity, type:int, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_sales_price SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_sales_price, type:float, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_sold_date_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_sold_date_sk, type:int, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_sold_time_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_sold_time_sk, type:int, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_store_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_store_sk, type:int, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_ticket_number SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ticket_number, type:int, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_wholesale_cost SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_wholesale_cost, type:float, comment:null), ] +PREHOOK: query: explain +select + ss_ticket_number +from + store_sales +group by ss_ticket_number +limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + ss_ticket_number +from + store_sales +group by ss_ticket_number +limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_ticket_number (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select + ss_ticket_number +from + store_sales +group by ss_ticket_number +limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@store_sales +#### A masked pattern was here #### +POSTHOOK: query: select + ss_ticket_number +from + store_sales +group by ss_ticket_number +limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@store_sales +#### A masked pattern was here #### +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 diff --git ql/src/test/results/clientpositive/tez/vectorized_distinct_gby.q.out ql/src/test/results/clientpositive/tez/vectorized_distinct_gby.q.out index 932b175..23798e0 100644 --- ql/src/test/results/clientpositive/tez/vectorized_distinct_gby.q.out +++ ql/src/test/results/clientpositive/tez/vectorized_distinct_gby.q.out @@ -138,6 +138,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: struct), _col3 (type: struct) + Execution mode: vectorized Reducer 3 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out index add8a65..b253508 100644 --- ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out @@ -2870,6 +2870,7 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE Target column: ds Target Vertex: Map 1 + Execution mode: vectorized Reducer 8 Reduce Operator Tree: Group By Operator @@ -2905,6 +2906,7 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE Target column: ds Target Vertex: Map 1 + Execution mode: vectorized Union 6 Vertex: Union 6 @@ -3070,6 +3072,7 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE Target column: ds Target Vertex: Map 1 + Execution mode: vectorized Reducer 8 Reduce Operator Tree: Group By Operator @@ -3105,6 +3108,7 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE Target column: ds Target Vertex: Map 1 + Execution mode: vectorized Union 6 Vertex: Union 6 @@ -3277,6 +3281,7 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE Target column: ds Target Vertex: Map 5 + Execution mode: vectorized Reducer 2 Reduce Operator Tree: Group By Operator @@ -3370,6 +3375,7 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE Target column: ds Target Vertex: Map 5 + Execution mode: vectorized Union 3 Vertex: Union 3 Union 9 @@ -4962,6 +4968,7 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE Target column: ds Target Vertex: Map 1 + Execution mode: vectorized Reducer 7 Reduce Operator Tree: Group By Operator @@ -4997,6 +5004,7 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE Target column: ds Target Vertex: Map 1 + Execution mode: vectorized Union 5 Vertex: Union 5 diff --git ql/src/test/results/clientpositive/vector_groupby_reduce.q.out ql/src/test/results/clientpositive/vector_groupby_reduce.q.out index fe79b9c..158013f 100644 --- ql/src/test/results/clientpositive/vector_groupby_reduce.q.out +++ ql/src/test/results/clientpositive/vector_groupby_reduce.q.out @@ -211,110 +211,10 @@ POSTHOOK: Lineage: store_sales.ss_sold_time_sk SIMPLE [(store_sales_txt)store_sa POSTHOOK: Lineage: store_sales.ss_store_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_store_sk, type:int, comment:null), ] POSTHOOK: Lineage: store_sales.ss_ticket_number SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ticket_number, type:int, comment:null), ] POSTHOOK: Lineage: store_sales.ss_wholesale_cost SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_wholesale_cost, type:float, comment:null), ] -PREHOOK: query: explain -select - ss_ticket_number -from - store_sales -group by ss_ticket_number -limit 20 -PREHOOK: type: QUERY -POSTHOOK: query: explain -select - ss_ticket_number -from - store_sales -group by ss_ticket_number -limit 20 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: store_sales - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_ticket_number (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 20 - Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 20 - Processor Tree: - ListSink - -PREHOOK: query: select - ss_ticket_number -from - store_sales -group by ss_ticket_number -limit 20 -PREHOOK: type: QUERY -PREHOOK: Input: default@store_sales -#### A masked pattern was here #### -POSTHOOK: query: select - ss_ticket_number -from - store_sales -group by ss_ticket_number -limit 20 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@store_sales -#### A masked pattern was here #### -1 -2 -3 -4 -5 -6 -7 -8 -9 -10 -11 -12 -13 -14 -15 -16 -17 -18 -19 -20 -PREHOOK: query: explain +PREHOOK: query: -- The Reduce task has 2 MergePartial GROUP BY operators in a row. Currently, +-- we don't issue startGroup with keys out of the 1st vectorized GROUP BY, so we can't +-- vectorize the 2nd GROUP BY... +explain select min(ss_ticket_number) from @@ -326,7 +226,10 @@ from group by ss_ticket_number limit 20 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- The Reduce task has 2 MergePartial GROUP BY operators in a row. Currently, +-- we don't issue startGroup with keys out of the 1st vectorized GROUP BY, so we can't +-- vectorize the 2nd GROUP BY... +explain select min(ss_ticket_number) from diff --git ql/src/test/results/clientpositive/vector_groupby_reduce2.q.out ql/src/test/results/clientpositive/vector_groupby_reduce2.q.out new file mode 100644 index 0000000..cff068d --- /dev/null +++ ql/src/test/results/clientpositive/vector_groupby_reduce2.q.out @@ -0,0 +1,316 @@ +PREHOOK: query: create table store_sales_txt +( + ss_sold_date_sk int, + ss_sold_time_sk int, + ss_item_sk int, + ss_customer_sk int, + ss_cdemo_sk int, + ss_hdemo_sk int, + ss_addr_sk int, + ss_store_sk int, + ss_promo_sk int, + ss_ticket_number int, + ss_quantity int, + ss_wholesale_cost float, + ss_list_price float, + ss_sales_price float, + ss_ext_discount_amt float, + ss_ext_sales_price float, + ss_ext_wholesale_cost float, + ss_ext_list_price float, + ss_ext_tax float, + ss_coupon_amt float, + ss_net_paid float, + ss_net_paid_inc_tax float, + ss_net_profit float +) +row format delimited fields terminated by '|' +stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@store_sales_txt +POSTHOOK: query: create table store_sales_txt +( + ss_sold_date_sk int, + ss_sold_time_sk int, + ss_item_sk int, + ss_customer_sk int, + ss_cdemo_sk int, + ss_hdemo_sk int, + ss_addr_sk int, + ss_store_sk int, + ss_promo_sk int, + ss_ticket_number int, + ss_quantity int, + ss_wholesale_cost float, + ss_list_price float, + ss_sales_price float, + ss_ext_discount_amt float, + ss_ext_sales_price float, + ss_ext_wholesale_cost float, + ss_ext_list_price float, + ss_ext_tax float, + ss_coupon_amt float, + ss_net_paid float, + ss_net_paid_inc_tax float, + ss_net_profit float +) +row format delimited fields terminated by '|' +stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@store_sales_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/store_sales.txt' OVERWRITE INTO TABLE store_sales_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@store_sales_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/store_sales.txt' OVERWRITE INTO TABLE store_sales_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@store_sales_txt +PREHOOK: query: create table store_sales +( + ss_sold_date_sk int, + ss_sold_time_sk int, + ss_item_sk int, + ss_customer_sk int, + ss_cdemo_sk int, + ss_hdemo_sk int, + ss_addr_sk int, + ss_store_sk int, + ss_promo_sk int, + ss_ticket_number int, + ss_quantity int, + ss_wholesale_cost float, + ss_list_price float, + ss_sales_price float, + ss_ext_discount_amt float, + ss_ext_sales_price float, + ss_ext_wholesale_cost float, + ss_ext_list_price float, + ss_ext_tax float, + ss_coupon_amt float, + ss_net_paid float, + ss_net_paid_inc_tax float, + ss_net_profit float +) +stored as orc +tblproperties ("orc.stripe.size"="33554432", "orc.compress.size"="16384") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@store_sales +POSTHOOK: query: create table store_sales +( + ss_sold_date_sk int, + ss_sold_time_sk int, + ss_item_sk int, + ss_customer_sk int, + ss_cdemo_sk int, + ss_hdemo_sk int, + ss_addr_sk int, + ss_store_sk int, + ss_promo_sk int, + ss_ticket_number int, + ss_quantity int, + ss_wholesale_cost float, + ss_list_price float, + ss_sales_price float, + ss_ext_discount_amt float, + ss_ext_sales_price float, + ss_ext_wholesale_cost float, + ss_ext_list_price float, + ss_ext_tax float, + ss_coupon_amt float, + ss_net_paid float, + ss_net_paid_inc_tax float, + ss_net_profit float +) +stored as orc +tblproperties ("orc.stripe.size"="33554432", "orc.compress.size"="16384") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@store_sales +PREHOOK: query: insert overwrite table store_sales +select +ss_sold_date_sk , + ss_sold_time_sk , + ss_item_sk , + ss_customer_sk , + ss_cdemo_sk , + ss_hdemo_sk , + ss_addr_sk , + ss_store_sk , + ss_promo_sk , + ss_ticket_number , + ss_quantity , + ss_wholesale_cost , + ss_list_price , + ss_sales_price , + ss_ext_discount_amt , + ss_ext_sales_price , + ss_ext_wholesale_cost , + ss_ext_list_price , + ss_ext_tax , + ss_coupon_amt , + ss_net_paid , + ss_net_paid_inc_tax , + ss_net_profit + from store_sales_txt +PREHOOK: type: QUERY +PREHOOK: Input: default@store_sales_txt +PREHOOK: Output: default@store_sales +POSTHOOK: query: insert overwrite table store_sales +select +ss_sold_date_sk , + ss_sold_time_sk , + ss_item_sk , + ss_customer_sk , + ss_cdemo_sk , + ss_hdemo_sk , + ss_addr_sk , + ss_store_sk , + ss_promo_sk , + ss_ticket_number , + ss_quantity , + ss_wholesale_cost , + ss_list_price , + ss_sales_price , + ss_ext_discount_amt , + ss_ext_sales_price , + ss_ext_wholesale_cost , + ss_ext_list_price , + ss_ext_tax , + ss_coupon_amt , + ss_net_paid , + ss_net_paid_inc_tax , + ss_net_profit + from store_sales_txt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@store_sales_txt +POSTHOOK: Output: default@store_sales +POSTHOOK: Lineage: store_sales.ss_addr_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_addr_sk, type:int, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_cdemo_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_cdemo_sk, type:int, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_coupon_amt SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_coupon_amt, type:float, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_customer_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_customer_sk, type:int, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_ext_discount_amt SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ext_discount_amt, type:float, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_ext_list_price SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ext_list_price, type:float, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_ext_sales_price SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ext_sales_price, type:float, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_ext_tax SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ext_tax, type:float, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_ext_wholesale_cost SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ext_wholesale_cost, type:float, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_hdemo_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_hdemo_sk, type:int, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_item_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_item_sk, type:int, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_list_price SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_list_price, type:float, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_net_paid SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_net_paid, type:float, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_net_paid_inc_tax SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_net_profit SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_net_profit, type:float, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_promo_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_promo_sk, type:int, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_quantity SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_quantity, type:int, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_sales_price SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_sales_price, type:float, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_sold_date_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_sold_date_sk, type:int, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_sold_time_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_sold_time_sk, type:int, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_store_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_store_sk, type:int, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_ticket_number SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ticket_number, type:int, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_wholesale_cost SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_wholesale_cost, type:float, comment:null), ] +PREHOOK: query: explain +select + ss_ticket_number +from + store_sales +group by ss_ticket_number +limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + ss_ticket_number +from + store_sales +group by ss_ticket_number +limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_ticket_number (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select + ss_ticket_number +from + store_sales +group by ss_ticket_number +limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@store_sales +#### A masked pattern was here #### +POSTHOOK: query: select + ss_ticket_number +from + store_sales +group by ss_ticket_number +limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@store_sales +#### A masked pattern was here #### +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20