diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 5d07fba..0bedab7 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -174,9 +174,11 @@ minitez.query.files.shared=alter_merge_2_orc.q,\ update_where_non_partitioned.q,\ update_where_partitioned.q,\ update_two_cols.q,\ + vector_aggregate_9.q,\ vector_between_in.q,\ vector_bucket.q,\ vector_cast_constant.q,\ + vector_char_2.q,\ vector_char_4.q,\ vector_char_simple.q,\ vector_coalesce.q,\ @@ -196,6 +198,8 @@ minitez.query.files.shared=alter_merge_2_orc.q,\ vector_decimal_mapjoin.q,\ vector_decimal_math_funcs.q,\ vector_decimal_precision.q,\ + vector_decimal_round.q,\ + vector_decimal_round_2.q,\ vector_decimal_trailing.q,\ vector_decimal_udf.q,\ vector_decimal_udf2.q,\ @@ -205,6 +209,7 @@ minitez.query.files.shared=alter_merge_2_orc.q,\ vector_groupby_reduce.q,\ vector_left_outer_join.q,\ vector_mapjoin_reduce.q,\ + vector_multi_insert.q,\ vector_non_string_partition.q,\ vector_orderby_5.q,\ vector_partition_diff_num_cols.q,\ @@ -245,6 +250,7 @@ minitez.query.files.shared=alter_merge_2_orc.q,\ vectorized_context.q,\ vectorized_date_funcs.q,\ vectorized_distinct_gby.q,\ + vectorized_dynamic_partition_pruning.q,\ vectorized_mapjoin.q,\ vectorized_math_funcs.q,\ vectorized_nested_mapjoin.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java index 90b4b12..87e4db7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java @@ -747,7 +747,7 @@ public VectorGroupByOperator(VectorizationContext vContext, OperatorDesc conf) aggregators = new VectorAggregateExpression[aggrDesc.size()]; for (int i = 0; i < aggrDesc.size(); ++i) { AggregationDesc aggDesc = aggrDesc.get(i); - aggregators[i] = vContext.getAggregatorExpression(aggDesc, desc.getVectorDesc().isReduce()); + aggregators[i] = vContext.getAggregatorExpression(aggDesc, desc.getVectorDesc().isHash()); } isVectorOutput = desc.getVectorDesc().isVectorOutput(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 5201c57..3e1e994 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -2012,7 +2012,7 @@ static String getUndecoratedName(String hiveTypeName) { add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdSampDecimal.class)); }}; - public VectorAggregateExpression getAggregatorExpression(AggregationDesc desc, boolean isReduce) + public VectorAggregateExpression getAggregatorExpression(AggregationDesc desc, boolean isHash) throws HiveException { ArrayList paramDescList = desc.getParameters(); @@ -2040,12 +2040,11 @@ public VectorAggregateExpression getAggregatorExpression(AggregationDesc desc, b inputType == VectorExpressionDescriptor.ArgumentType.NONE) || (aggDef.getType().isSameTypeOrFamily(inputType)))) { - if (aggDef.getMode() == GroupByDesc.Mode.HASH && isReduce) { - continue; - } else if (aggDef.getMode() == GroupByDesc.Mode.MERGEPARTIAL && !isReduce) { - continue; - } - + if (aggDef.getMode() == GroupByDesc.Mode.HASH && !isHash) { + continue; + } else if (aggDef.getMode() == GroupByDesc.Mode.MERGEPARTIAL && isHash) { + continue; + } Class aggClass = aggDef.getAggClass(); try { @@ -2063,7 +2062,7 @@ public VectorAggregateExpression getAggregatorExpression(AggregationDesc desc, b } throw new HiveException("Vector aggregate not implemented: \"" + aggregateName + - "\" for type: \"" + inputType.name() + " (reduce-side = " + isReduce + ")"); + "\" for type: \"" + inputType.name() + " (isHash = " + isHash + ")"); } public Map getScratchColumnTypeMap() { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index c8e6ef5..8261b49 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -1068,29 +1068,54 @@ private boolean validateGroupByOperator(GroupByOperator op, boolean isReduce, bo if (!ret) { return false; } - ret = validateAggregationDesc(desc.getAggregators(), isReduce); + boolean isHash = false; + switch (desc.getMode()) { + case HASH: + isHash = true; + break; + + case COMPLETE: + case PARTIAL1: + case PARTIAL2: + case MERGEPARTIAL: + isHash = false; + break; + + default: + LOG.info("Unexpected GROUP BY mode " + desc.getMode().name()); + return false; + } + LOG.info("GROUP BY mode is " + desc.getMode().name()); + vectorDesc.setIsHash(isHash); + ret = validateAggregationDesc(desc.getAggregators(), isHash); if (!ret) { return false; } - if (isReduce) { - if (desc.isDistinct()) { - LOG.info("Distinct not supported in reduce vector mode"); + + // Hash GROUP BY will appear in MapWork, and in the middle of ReduceWork + // when there is a downstream Reducer that will do the GROUP BY MergePartial. + // + // MergePartial is only appears in ReduceWork. + // + if (!isHash) { + if (!isReduce) { + LOG.info("Only expecting GROUP BY MergePartial in ReduceWork"); return false; } - // Sort-based GroupBy? - if (desc.getMode() != GroupByDesc.Mode.COMPLETE && - desc.getMode() != GroupByDesc.Mode.PARTIAL1 && - desc.getMode() != GroupByDesc.Mode.PARTIAL2 && - desc.getMode() != GroupByDesc.Mode.MERGEPARTIAL) { - LOG.info("Reduce vector mode not supported when input for GROUP BY not sorted"); + // This kind of Group By in ReduceWork needs to the top operator since it is + // being fed by reduce-shuffle. + // + // And, there are other current restrictions. + // + if (desc.isDistinct()) { + LOG.info("Distinct not supported in reduce vector mode"); return false; } - LOG.info("Reduce GROUP BY mode is " + desc.getMode().name()); if (desc.getGroupKeyNotReductionKey()) { LOG.info("Reduce vector mode not supported when group key is not reduction key"); return false; } - if (!aggregatorsOutputIsPrimitive(desc.getAggregators(), isReduce)) { + if (!aggregatorsOutputIsPrimitive(desc.getAggregators(), /* isHash */ false)) { LOG.info("Reduce vector mode only supported when aggregate outputs are primitive types"); return false; } @@ -1105,7 +1130,6 @@ private boolean validateGroupByOperator(GroupByOperator op, boolean isReduce, bo LOG.info("Reduce-side GROUP BY will do global aggregation"); } vectorDesc.setVectorOutput(true); - vectorDesc.setIsReduce(true); } return true; } @@ -1138,9 +1162,9 @@ private boolean validateExprNodeDesc(List descs, return true; } - private boolean validateAggregationDesc(List descs, boolean isReduce) { + private boolean validateAggregationDesc(List descs, boolean isHash) { for (AggregationDesc d : descs) { - boolean ret = validateAggregationDesc(d, isReduce); + boolean ret = validateAggregationDesc(d, isHash); if (!ret) { return false; } @@ -1216,7 +1240,7 @@ private boolean validateGenericUdf(ExprNodeGenericFuncDesc genericUDFExpr) { } } - private boolean validateAggregationDesc(AggregationDesc aggDesc, boolean isReduce) { + private boolean validateAggregationDesc(AggregationDesc aggDesc, boolean isHash) { if (!supportedAggregationUdfs.contains(aggDesc.getGenericUDAFName().toLowerCase())) { return false; } @@ -1226,7 +1250,7 @@ private boolean validateAggregationDesc(AggregationDesc aggDesc, boolean isReduc // See if we can vectorize the aggregation. try { VectorizationContext vc = new ValidatorVectorizationContext(); - if (vc.getAggregatorExpression(aggDesc, isReduce) == null) { + if (vc.getAggregatorExpression(aggDesc, isHash) == null) { // TODO: this cannot happen - VectorizationContext throws in such cases. LOG.info("getAggregatorExpression returned null"); return false; @@ -1238,9 +1262,9 @@ private boolean validateAggregationDesc(AggregationDesc aggDesc, boolean isReduc return true; } - private boolean aggregatorsOutputIsPrimitive(List descs, boolean isReduce) { + private boolean aggregatorsOutputIsPrimitive(List descs, boolean isHash) { for (AggregationDesc d : descs) { - boolean ret = aggregatorsOutputIsPrimitive(d, isReduce); + boolean ret = aggregatorsOutputIsPrimitive(d, isHash); if (!ret) { return false; } @@ -1248,11 +1272,11 @@ private boolean aggregatorsOutputIsPrimitive(List descs, boolea return true; } - private boolean aggregatorsOutputIsPrimitive(AggregationDesc aggDesc, boolean isReduce) { + private boolean aggregatorsOutputIsPrimitive(AggregationDesc aggDesc, boolean isHash) { VectorizationContext vc = new ValidatorVectorizationContext(); VectorAggregateExpression vectorAggrExpr; try { - vectorAggrExpr = vc.getAggregatorExpression(aggDesc, isReduce); + vectorAggrExpr = vc.getAggregatorExpression(aggDesc, isHash); } catch (Exception e) { // We should have already attempted to vectorize in validateAggregationDesc. LOG.info("Vectorization of aggreation should have succeeded ", e); diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java index b92c38b..f9008a2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java @@ -30,22 +30,23 @@ private static long serialVersionUID = 1L; - private boolean isReduce; + private boolean isHash; private boolean isVectorGroupBatches; private boolean isVectorOutput; public VectorGroupByDesc() { - this.isReduce = false; + // Assume the normal case -- Hash GROUP BY. + this.isHash = true; this.isVectorGroupBatches = false; this.isVectorOutput = false; } - public boolean isReduce() { - return isReduce; + public boolean isHash() { + return isHash; } - public void setIsReduce(boolean isReduce) { - this.isReduce = isReduce; + public void setIsHash(boolean isHash) { + this.isHash = isHash; } public boolean isVectorGroupBatches() { diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java index fbb7ff2..895147e 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java @@ -2267,7 +2267,7 @@ public void testAggregateCountReduceIterable ( GroupByDesc desc = buildGroupByDescType(ctx, "count", "A", TypeInfoFactory.longTypeInfo); VectorGroupByDesc vectorDesc = desc.getVectorDesc(); - vectorDesc.setIsReduce(true); + vectorDesc.setIsHash(false); VectorGroupByOperator vgo = new VectorGroupByOperator(ctx, desc); diff --git ql/src/test/results/clientpositive/tez/vector_aggregate_9.q.out ql/src/test/results/clientpositive/tez/vector_aggregate_9.q.out index 9645b13..979990d 100644 --- ql/src/test/results/clientpositive/tez/vector_aggregate_9.q.out +++ ql/src/test/results/clientpositive/tez/vector_aggregate_9.q.out @@ -170,4 +170,4 @@ POSTHOOK: query: select min(dc), max(dc), sum(dc), avg(dc) from vectortab2korc POSTHOOK: type: QUERY POSTHOOK: Input: default@vectortab2korc #### A masked pattern was here #### --4997414117561.546875 4994550248722.298828 -10252745435816.024410 -5399023399.587163986308583465 +-4997414117561.546875 4994550248722.298828 -10252745435816.02441 -5399023399.587163986308583465 diff --git ql/src/test/results/clientpositive/tez/vector_char_2.q.out ql/src/test/results/clientpositive/tez/vector_char_2.q.out index 59f872a..4a2b795 100644 --- ql/src/test/results/clientpositive/tez/vector_char_2.q.out +++ ql/src/test/results/clientpositive/tez/vector_char_2.q.out @@ -77,12 +77,12 @@ STAGE PLANS: alias: char_2 Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: char(20)), key (type: char(10)) - outputColumnNames: value, key + expressions: value (type: char(20)), UDFToInteger(key) (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(UDFToInteger(key)), count() - keys: value (type: char(20)) + aggregations: sum(_col1), count() + keys: _col0 (type: char(20)) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE @@ -101,15 +101,11 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: char(20)), _col1 (type: bigint), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: char(20)) + sort order: + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: char(20)) - sort order: + - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint) + value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: vectorized Reducer 3 Reduce Operator Tree: @@ -209,12 +205,12 @@ STAGE PLANS: alias: char_2 Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: char(20)), key (type: char(10)) - outputColumnNames: value, key + expressions: value (type: char(20)), UDFToInteger(key) (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(UDFToInteger(key)), count() - keys: value (type: char(20)) + aggregations: sum(_col1), count() + keys: _col0 (type: char(20)) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE @@ -233,15 +229,11 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: char(20)), _col1 (type: bigint), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: char(20)) + sort order: - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: char(20)) - sort order: - - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint) + value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: vectorized Reducer 3 Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/tez/vector_decimal_round_2.q.out ql/src/test/results/clientpositive/tez/vector_decimal_round_2.q.out index 0d00c2d..651a5fb 100644 --- ql/src/test/results/clientpositive/tez/vector_decimal_round_2.q.out +++ ql/src/test/results/clientpositive/tez/vector_decimal_round_2.q.out @@ -121,7 +121,7 @@ FROM decimal_tbl_1_orc ORDER BY d POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_1_orc #### A masked pattern was here #### -55555 55555 55555.0 55555.00 55555.000 55560 55600 56000 60000 100000 0 0 0 +55555 55555 55555 55555 55555 55560 55600 56000 60000 100000 0 0 0 PREHOOK: query: create table decimal_tbl_2_orc (pos decimal(38,18), neg decimal(38,18)) STORED AS ORC PREHOOK: type: CREATETABLE @@ -240,7 +240,7 @@ FROM decimal_tbl_2_orc ORDER BY p POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_2_orc #### A masked pattern was here #### -125 125 125.3 125.32 125.315 125.3150 130 100 0 0 -125 -125 -125.3 -125.32 -125.315 -125.3150 -130 -100 0 0 +125 125 125.3 125.32 125.315 125.315 130 100 0 0 -125 -125 -125.3 -125.32 -125.315 -125.315 -130 -100 0 0 PREHOOK: query: create table decimal_tbl_3_orc (dec decimal(38,18)) STORED AS ORC PREHOOK: type: CREATETABLE @@ -402,7 +402,7 @@ FROM decimal_tbl_3_orc ORDER BY d POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_3_orc #### A masked pattern was here #### -0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 3.1 3.14 3.142 3.1416 3.14159 3.141593 3.1415927 3.14159265 3.141592654 3.1415926536 3.14159265359 3.141592653590 3.1415926535898 3.1415926535898 3.14159265358979 3.141592653589793 3.1415926535897930 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 3.1 3.14 3.142 3.1416 3.14159 3.141593 3.1415927 3.14159265 3.141592654 3.1415926536 3.14159265359 3.14159265359 3.1415926535898 3.1415926535898 3.14159265358979 3.141592653589793 3.141592653589793 PREHOOK: query: create table decimal_tbl_4_orc (pos decimal(38,18), neg decimal(38,18)) STORED AS ORC PREHOOK: type: CREATETABLE diff --git ql/src/test/results/clientpositive/tez/vector_multi_insert.q.out ql/src/test/results/clientpositive/tez/vector_multi_insert.q.out new file mode 100644 index 0000000..d382399 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_multi_insert.q.out @@ -0,0 +1,225 @@ +PREHOOK: query: create table orc1 + stored as orc + tblproperties("orc.compress"="ZLIB") + as + select rn + from + ( + select cast(1 as int) as rn from src limit 1 + union all + select cast(100 as int) as rn from src limit 1 + union all + select cast(10000 as int) as rn from src limit 1 + ) t +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@orc1 +POSTHOOK: query: create table orc1 + stored as orc + tblproperties("orc.compress"="ZLIB") + as + select rn + from + ( + select cast(1 as int) as rn from src limit 1 + union all + select cast(100 as int) as rn from src limit 1 + union all + select cast(10000 as int) as rn from src limit 1 + ) t +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc1 +PREHOOK: query: create table orc_rn1 (rn int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_rn1 +POSTHOOK: query: create table orc_rn1 (rn int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_rn1 +PREHOOK: query: create table orc_rn2 (rn int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_rn2 +POSTHOOK: query: create table orc_rn2 (rn int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_rn2 +PREHOOK: query: create table orc_rn3 (rn int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_rn3 +POSTHOOK: query: create table orc_rn3 (rn int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_rn3 +PREHOOK: query: explain from orc1 a +insert overwrite table orc_rn1 select a.* where a.rn < 100 +insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000 +insert overwrite table orc_rn3 select a.* where a.rn >= 1000 +PREHOOK: type: QUERY +POSTHOOK: query: explain from orc1 a +insert overwrite table orc_rn1 select a.* where a.rn < 100 +insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000 +insert overwrite table orc_rn3 select a.* where a.rn >= 1000 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-4 + Stage-7 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-3 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 136 Data size: 544 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (rn < 100) (type: boolean) + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rn (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.orc_rn1 + Filter Operator + predicate: ((rn >= 100) and (rn < 1000)) (type: boolean) + Statistics: Num rows: 15 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rn (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 15 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.orc_rn2 + Filter Operator + predicate: (rn >= 1000) (type: boolean) + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rn (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.orc_rn3 + Execution mode: vectorized + + Stage: Stage-4 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.orc_rn1 + + Stage: Stage-5 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.orc_rn2 + + Stage: Stage-6 + Stats-Aggr Operator + + Stage: Stage-2 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.orc_rn3 + + Stage: Stage-7 + Stats-Aggr Operator + +PREHOOK: query: from orc1 a +insert overwrite table orc_rn1 select a.* where a.rn < 100 +insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000 +insert overwrite table orc_rn3 select a.* where a.rn >= 1000 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc1 +PREHOOK: Output: default@orc_rn1 +PREHOOK: Output: default@orc_rn2 +PREHOOK: Output: default@orc_rn3 +POSTHOOK: query: from orc1 a +insert overwrite table orc_rn1 select a.* where a.rn < 100 +insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000 +insert overwrite table orc_rn3 select a.* where a.rn >= 1000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc1 +POSTHOOK: Output: default@orc_rn1 +POSTHOOK: Output: default@orc_rn2 +POSTHOOK: Output: default@orc_rn3 +POSTHOOK: Lineage: orc_rn1.rn SIMPLE [(orc1)a.FieldSchema(name:rn, type:int, comment:null), ] +POSTHOOK: Lineage: orc_rn2.rn SIMPLE [(orc1)a.FieldSchema(name:rn, type:int, comment:null), ] +POSTHOOK: Lineage: orc_rn3.rn SIMPLE [(orc1)a.FieldSchema(name:rn, type:int, comment:null), ] +PREHOOK: query: select * from orc_rn1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_rn1 +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_rn1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_rn1 +#### A masked pattern was here #### +1 +PREHOOK: query: select * from orc_rn2 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_rn2 +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_rn2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_rn2 +#### A masked pattern was here #### +100 +PREHOOK: query: select * from orc_rn3 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_rn3 +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_rn3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_rn3 +#### A masked pattern was here #### +10000 diff --git ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out index 8487e6f..39d276d 100644 --- ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out @@ -1963,6 +1963,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Target column: ds Target Vertex: Map 1 + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -1986,7 +1987,7 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### -1000 +0 PREHOOK: query: select count(*) from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart @@ -2958,6 +2959,7 @@ STAGE PLANS: Partition key expr: ds Target column: ds Target Vertex: Map 1 + Execution mode: vectorized Reducer 8 Reduce Operator Tree: Group By Operator @@ -2986,6 +2988,7 @@ STAGE PLANS: Partition key expr: ds Target column: ds Target Vertex: Map 1 + Execution mode: vectorized Union 6 Vertex: Union 6 @@ -3011,7 +3014,7 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### -2000 +0 PREHOOK: query: EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) @@ -3148,6 +3151,7 @@ STAGE PLANS: Partition key expr: ds Target column: ds Target Vertex: Map 1 + Execution mode: vectorized Reducer 8 Reduce Operator Tree: Group By Operator @@ -3176,6 +3180,7 @@ STAGE PLANS: Partition key expr: ds Target column: ds Target Vertex: Map 1 + Execution mode: vectorized Union 6 Vertex: Union 6 @@ -3201,8 +3206,6 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### -2008-04-08 -2008-04-09 PREHOOK: query: EXPLAIN select ds from (select distinct(ds) as ds from srcpart union all select distinct(ds) as ds from srcpart) s where s.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN select ds from (select distinct(ds) as ds from srcpart union all select distinct(ds) as ds from srcpart) s where s.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) @@ -3338,6 +3341,7 @@ STAGE PLANS: Partition key expr: ds Target column: ds Target Vertex: Map 5 + Execution mode: vectorized Reducer 2 Reduce Operator Tree: Group By Operator @@ -3421,6 +3425,7 @@ STAGE PLANS: Partition key expr: ds Target column: ds Target Vertex: Map 5 + Execution mode: vectorized Union 3 Vertex: Union 3 Union 9 @@ -3448,10 +3453,6 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### -2008-04-08 -2008-04-08 -2008-04-09 -2008-04-09 PREHOOK: query: -- single column, single key EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08' PREHOOK: type: QUERY @@ -4361,6 +4362,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Target column: ds Target Vertex: Map 1 + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -4384,7 +4386,7 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### -1000 +0 PREHOOK: query: select count(*) from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart @@ -5039,6 +5041,7 @@ STAGE PLANS: Partition key expr: ds Target column: ds Target Vertex: Map 1 + Execution mode: vectorized Reducer 7 Reduce Operator Tree: Group By Operator @@ -5067,6 +5070,7 @@ STAGE PLANS: Partition key expr: ds Target column: ds Target Vertex: Map 1 + Execution mode: vectorized Union 5 Vertex: Union 5 @@ -5092,8 +5096,6 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### -2008-04-08 -2008-04-09 PREHOOK: query: -- different file format create table srcpart_orc (key int, value string) partitioned by (ds string, hr int) stored as orc PREHOOK: type: CREATETABLE