diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 917b3a4..2a66898 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -174,9 +174,11 @@ minitez.query.files.shared=alter_merge_2_orc.q,\ update_where_non_partitioned.q,\ update_where_partitioned.q,\ update_two_cols.q,\ + vector_aggregate_9.q,\ vector_between_in.q,\ vector_bucket.q,\ vector_cast_constant.q,\ + vector_char_2.q,\ vector_char_4.q,\ vector_char_mapjoin1.q,\ vector_char_simple.q,\ @@ -197,6 +199,8 @@ minitez.query.files.shared=alter_merge_2_orc.q,\ vector_decimal_mapjoin.q,\ vector_decimal_math_funcs.q,\ vector_decimal_precision.q,\ + vector_decimal_round.q,\ + vector_decimal_round_2.q,\ vector_decimal_trailing.q,\ vector_decimal_udf.q,\ vector_decimal_udf2.q,\ @@ -206,6 +210,7 @@ minitez.query.files.shared=alter_merge_2_orc.q,\ vector_groupby_reduce.q,\ vector_left_outer_join.q,\ vector_mapjoin_reduce.q,\ + vector_multi_insert.q,\ vector_non_string_partition.q,\ vector_orderby_5.q,\ vector_partition_diff_num_cols.q,\ @@ -247,6 +252,7 @@ minitez.query.files.shared=alter_merge_2_orc.q,\ vectorized_context.q,\ vectorized_date_funcs.q,\ vectorized_distinct_gby.q,\ + vectorized_dynamic_partition_pruning.q,\ vectorized_mapjoin.q,\ vectorized_math_funcs.q,\ vectorized_nested_mapjoin.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java index 90b4b12..0ae3cf3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java @@ -747,7 +747,7 @@ public VectorGroupByOperator(VectorizationContext vContext, OperatorDesc conf) aggregators = new VectorAggregateExpression[aggrDesc.size()]; for (int i = 0; i < aggrDesc.size(); ++i) { AggregationDesc aggDesc = aggrDesc.get(i); - aggregators[i] = vContext.getAggregatorExpression(aggDesc, desc.getVectorDesc().isReduce()); + aggregators[i] = vContext.getAggregatorExpression(aggDesc, desc.getVectorDesc().isReduceMergePartial()); } isVectorOutput = desc.getVectorDesc().isVectorOutput(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 5201c57..5db6b95 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -2012,7 +2012,7 @@ static String getUndecoratedName(String hiveTypeName) { add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdSampDecimal.class)); }}; - public VectorAggregateExpression getAggregatorExpression(AggregationDesc desc, boolean isReduce) + public VectorAggregateExpression getAggregatorExpression(AggregationDesc desc, boolean isReduceMergePartial) throws HiveException { ArrayList paramDescList = desc.getParameters(); @@ -2040,11 +2040,11 @@ public VectorAggregateExpression getAggregatorExpression(AggregationDesc desc, b inputType == VectorExpressionDescriptor.ArgumentType.NONE) || (aggDef.getType().isSameTypeOrFamily(inputType)))) { - if (aggDef.getMode() == GroupByDesc.Mode.HASH && isReduce) { - continue; - } else if (aggDef.getMode() == GroupByDesc.Mode.MERGEPARTIAL && !isReduce) { - continue; - } + if (aggDef.getMode() == GroupByDesc.Mode.HASH && isReduceMergePartial) { + continue; + } else if (aggDef.getMode() == GroupByDesc.Mode.MERGEPARTIAL && !isReduceMergePartial) { + continue; + } Class aggClass = aggDef.getAggClass(); try @@ -2063,7 +2063,7 @@ public VectorAggregateExpression getAggregatorExpression(AggregationDesc desc, b } throw new HiveException("Vector aggregate not implemented: \"" + aggregateName + - "\" for type: \"" + inputType.name() + " (reduce-side = " + isReduce + ")"); + "\" for type: \"" + inputType.name() + " (reduce-merge-partial = " + isReduceMergePartial + ")"); } public Map getScratchColumnTypeMap() { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 94b4621..73d0ed0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -1081,44 +1081,77 @@ private boolean validateGroupByOperator(GroupByOperator op, boolean isReduce, bo if (!ret) { return false; } - ret = validateAggregationDesc(desc.getAggregators(), isReduce); - if (!ret) { + boolean isMergePartial = false; + switch (desc.getMode()) { + case HASH: + isMergePartial = false; + break; + + case COMPLETE: + case PARTIAL1: + case PARTIAL2: + case MERGEPARTIAL: + isMergePartial = true; + break; + + default: + LOG.info("Unexpected GROUP BY mode " + desc.getMode().name()); return false; } - if (isReduce) { - if (desc.isDistinct()) { - LOG.info("Distinct not supported in reduce vector mode"); - return false; - } - // Sort-based GroupBy? - if (desc.getMode() != GroupByDesc.Mode.COMPLETE && - desc.getMode() != GroupByDesc.Mode.PARTIAL1 && - desc.getMode() != GroupByDesc.Mode.PARTIAL2 && - desc.getMode() != GroupByDesc.Mode.MERGEPARTIAL) { - LOG.info("Reduce vector mode not supported when input for GROUP BY not sorted"); + LOG.info("GROUP BY mode is " + desc.getMode().name()); + + if (!isReduce) { + + // MapWork + // + if (isMergePartial) { + LOG.info("Can only vectorize MergePartial GROUP BY in ReduceWork"); return false; } - LOG.info("Reduce GROUP BY mode is " + desc.getMode().name()); - if (desc.getGroupKeyNotReductionKey()) { - LOG.info("Reduce vector mode not supported when group key is not reduction key"); + ret = validateAggregationDesc(desc.getAggregators(), /* isReduceMergeParital */ false); + if (!ret) { return false; } - if (!aggregatorsOutputIsPrimitive(desc.getAggregators(), isReduce)) { - LOG.info("Reduce vector mode only supported when aggregate outputs are primitive types"); + } else { + + // ReduceWork + // + ret = validateAggregationDesc(desc.getAggregators(), isMergePartial); + if (!ret) { return false; } - if (desc.getKeys().size() > 0) { - if (op.getParentOperators().size() > 0) { - LOG.info("Reduce vector mode can only handle a key group GROUP BY operator when it is fed by reduce-shuffle"); + if (isMergePartial) { + + // The VectorGroupByOperator currently only supports MergePartial when fed by + // grouping by keys from reduce-shuffle. + // + // There are more restrictions for Reduce MergePartial VectorGroupByOperator, too. + // + if (desc.isDistinct()) { + LOG.info("Vectorized Reduce MergePartial GROUP BY does not support DISTINCT"); return false; } - LOG.info("Reduce-side GROUP BY will process key groups"); - vectorDesc.setVectorGroupBatches(true); - } else { - LOG.info("Reduce-side GROUP BY will do global aggregation"); + if (desc.getGroupKeyNotReductionKey()) { + LOG.info("Vectorized Reduce MergePartial GROUP BY does not supported unless group key is the reduction key"); + return false; + } + if (!aggregatorsOutputIsPrimitive(desc.getAggregators(), isMergePartial)) { + LOG.info("Vectorized Reduce MergePartial GROUP BY can only handle aggregate outputs that are primitive types"); + return false; + } + if (desc.getKeys().size() > 0) { + if (op.getParentOperators().size() > 0) { + LOG.info("Vectorized Reduce MergePartial GROUP BY can only handle a key group when it is fed by reduce-shuffle"); + return false; + } + LOG.info("Vectorized Reduce MergePartial GROUP BY will process key groups"); + vectorDesc.setVectorGroupBatches(true); + } else { + LOG.info("Vectorized Reduce MergePartial GROUP BY will do global aggregation"); + } + vectorDesc.setVectorOutput(true); } - vectorDesc.setVectorOutput(true); - vectorDesc.setIsReduce(true); + vectorDesc.setIsReduceMergePartial(isMergePartial); } return true; } @@ -1151,9 +1184,9 @@ private boolean validateExprNodeDesc(List descs, return true; } - private boolean validateAggregationDesc(List descs, boolean isReduce) { + private boolean validateAggregationDesc(List descs, boolean isReduceMergePartial) { for (AggregationDesc d : descs) { - boolean ret = validateAggregationDesc(d, isReduce); + boolean ret = validateAggregationDesc(d, isReduceMergePartial); if (!ret) { return false; } @@ -1229,7 +1262,7 @@ private boolean validateGenericUdf(ExprNodeGenericFuncDesc genericUDFExpr) { } } - private boolean validateAggregationDesc(AggregationDesc aggDesc, boolean isReduce) { + private boolean validateAggregationDesc(AggregationDesc aggDesc, boolean isReduceMergePartial) { if (!supportedAggregationUdfs.contains(aggDesc.getGenericUDAFName().toLowerCase())) { return false; } @@ -1239,7 +1272,7 @@ private boolean validateAggregationDesc(AggregationDesc aggDesc, boolean isReduc // See if we can vectorize the aggregation. try { VectorizationContext vc = new ValidatorVectorizationContext(); - if (vc.getAggregatorExpression(aggDesc, isReduce) == null) { + if (vc.getAggregatorExpression(aggDesc, isReduceMergePartial) == null) { // TODO: this cannot happen - VectorizationContext throws in such cases. LOG.info("getAggregatorExpression returned null"); return false; @@ -1251,9 +1284,9 @@ private boolean validateAggregationDesc(AggregationDesc aggDesc, boolean isReduc return true; } - private boolean aggregatorsOutputIsPrimitive(List descs, boolean isReduce) { + private boolean aggregatorsOutputIsPrimitive(List descs, boolean isReduceMergePartial) { for (AggregationDesc d : descs) { - boolean ret = aggregatorsOutputIsPrimitive(d, isReduce); + boolean ret = aggregatorsOutputIsPrimitive(d, isReduceMergePartial); if (!ret) { return false; } @@ -1261,11 +1294,11 @@ private boolean aggregatorsOutputIsPrimitive(List descs, boolea return true; } - private boolean aggregatorsOutputIsPrimitive(AggregationDesc aggDesc, boolean isReduce) { + private boolean aggregatorsOutputIsPrimitive(AggregationDesc aggDesc, boolean isReduceMergePartial) { VectorizationContext vc = new ValidatorVectorizationContext(); VectorAggregateExpression vectorAggrExpr; try { - vectorAggrExpr = vc.getAggregatorExpression(aggDesc, isReduce); + vectorAggrExpr = vc.getAggregatorExpression(aggDesc, isReduceMergePartial); } catch (Exception e) { // We should have already attempted to vectorize in validateAggregationDesc. LOG.info("Vectorization of aggreation should have succeeded ", e); diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java index 7a0b0da..fcb8762 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java @@ -73,6 +73,7 @@ private VectorGroupByDesc vectorDesc; public GroupByDesc() { + this.mode = Mode.HASH; // Assume. vectorDesc = new VectorGroupByDesc(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java index b92c38b..07d2510 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java @@ -30,22 +30,22 @@ private static long serialVersionUID = 1L; - private boolean isReduce; + private boolean isReduceMergePartial; private boolean isVectorGroupBatches; private boolean isVectorOutput; public VectorGroupByDesc() { - this.isReduce = false; + this.isReduceMergePartial = false; this.isVectorGroupBatches = false; this.isVectorOutput = false; } - public boolean isReduce() { - return isReduce; + public boolean isReduceMergePartial() { + return isReduceMergePartial; } - public void setIsReduce(boolean isReduce) { - this.isReduce = isReduce; + public void setIsReduceMergePartial(boolean isReduceMergePartial) { + this.isReduceMergePartial = isReduceMergePartial; } public boolean isVectorGroupBatches() { diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java index fbb7ff2..15fb876 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java @@ -2267,7 +2267,7 @@ public void testAggregateCountReduceIterable ( GroupByDesc desc = buildGroupByDescType(ctx, "count", "A", TypeInfoFactory.longTypeInfo); VectorGroupByDesc vectorDesc = desc.getVectorDesc(); - vectorDesc.setIsReduce(true); + vectorDesc.setIsReduceMergePartial(true); VectorGroupByOperator vgo = new VectorGroupByOperator(ctx, desc); diff --git ql/src/test/results/clientpositive/tez/vector_aggregate_9.q.out ql/src/test/results/clientpositive/tez/vector_aggregate_9.q.out index 9645b13..979990d 100644 --- ql/src/test/results/clientpositive/tez/vector_aggregate_9.q.out +++ ql/src/test/results/clientpositive/tez/vector_aggregate_9.q.out @@ -170,4 +170,4 @@ POSTHOOK: query: select min(dc), max(dc), sum(dc), avg(dc) from vectortab2korc POSTHOOK: type: QUERY POSTHOOK: Input: default@vectortab2korc #### A masked pattern was here #### --4997414117561.546875 4994550248722.298828 -10252745435816.024410 -5399023399.587163986308583465 +-4997414117561.546875 4994550248722.298828 -10252745435816.02441 -5399023399.587163986308583465 diff --git ql/src/test/results/clientpositive/tez/vector_char_2.q.out ql/src/test/results/clientpositive/tez/vector_char_2.q.out index 59f872a..4a2b795 100644 --- ql/src/test/results/clientpositive/tez/vector_char_2.q.out +++ ql/src/test/results/clientpositive/tez/vector_char_2.q.out @@ -77,12 +77,12 @@ STAGE PLANS: alias: char_2 Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: char(20)), key (type: char(10)) - outputColumnNames: value, key + expressions: value (type: char(20)), UDFToInteger(key) (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(UDFToInteger(key)), count() - keys: value (type: char(20)) + aggregations: sum(_col1), count() + keys: _col0 (type: char(20)) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE @@ -101,15 +101,11 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: char(20)), _col1 (type: bigint), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: char(20)) + sort order: + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: char(20)) - sort order: + - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint) + value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: vectorized Reducer 3 Reduce Operator Tree: @@ -209,12 +205,12 @@ STAGE PLANS: alias: char_2 Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: char(20)), key (type: char(10)) - outputColumnNames: value, key + expressions: value (type: char(20)), UDFToInteger(key) (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(UDFToInteger(key)), count() - keys: value (type: char(20)) + aggregations: sum(_col1), count() + keys: _col0 (type: char(20)) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE @@ -233,15 +229,11 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: char(20)), _col1 (type: bigint), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: char(20)) + sort order: - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: char(20)) - sort order: - - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint) + value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: vectorized Reducer 3 Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/tez/vector_decimal_round_2.q.out ql/src/test/results/clientpositive/tez/vector_decimal_round_2.q.out index 0d00c2d..651a5fb 100644 --- ql/src/test/results/clientpositive/tez/vector_decimal_round_2.q.out +++ ql/src/test/results/clientpositive/tez/vector_decimal_round_2.q.out @@ -121,7 +121,7 @@ FROM decimal_tbl_1_orc ORDER BY d POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_1_orc #### A masked pattern was here #### -55555 55555 55555.0 55555.00 55555.000 55560 55600 56000 60000 100000 0 0 0 +55555 55555 55555 55555 55555 55560 55600 56000 60000 100000 0 0 0 PREHOOK: query: create table decimal_tbl_2_orc (pos decimal(38,18), neg decimal(38,18)) STORED AS ORC PREHOOK: type: CREATETABLE @@ -240,7 +240,7 @@ FROM decimal_tbl_2_orc ORDER BY p POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_2_orc #### A masked pattern was here #### -125 125 125.3 125.32 125.315 125.3150 130 100 0 0 -125 -125 -125.3 -125.32 -125.315 -125.3150 -130 -100 0 0 +125 125 125.3 125.32 125.315 125.315 130 100 0 0 -125 -125 -125.3 -125.32 -125.315 -125.315 -130 -100 0 0 PREHOOK: query: create table decimal_tbl_3_orc (dec decimal(38,18)) STORED AS ORC PREHOOK: type: CREATETABLE @@ -402,7 +402,7 @@ FROM decimal_tbl_3_orc ORDER BY d POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_3_orc #### A masked pattern was here #### -0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 3.1 3.14 3.142 3.1416 3.14159 3.141593 3.1415927 3.14159265 3.141592654 3.1415926536 3.14159265359 3.141592653590 3.1415926535898 3.1415926535898 3.14159265358979 3.141592653589793 3.1415926535897930 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 3.1 3.14 3.142 3.1416 3.14159 3.141593 3.1415927 3.14159265 3.141592654 3.1415926536 3.14159265359 3.14159265359 3.1415926535898 3.1415926535898 3.14159265358979 3.141592653589793 3.141592653589793 PREHOOK: query: create table decimal_tbl_4_orc (pos decimal(38,18), neg decimal(38,18)) STORED AS ORC PREHOOK: type: CREATETABLE diff --git ql/src/test/results/clientpositive/tez/vector_multi_insert.q.out ql/src/test/results/clientpositive/tez/vector_multi_insert.q.out new file mode 100644 index 0000000..50399d3 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_multi_insert.q.out @@ -0,0 +1,225 @@ +PREHOOK: query: create table orc1 + stored as orc + tblproperties("orc.compress"="ZLIB") + as + select rn + from + ( + select cast(1 as int) as rn from src limit 1 + union all + select cast(100 as int) as rn from src limit 1 + union all + select cast(10000 as int) as rn from src limit 1 + ) t +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@orc1 +POSTHOOK: query: create table orc1 + stored as orc + tblproperties("orc.compress"="ZLIB") + as + select rn + from + ( + select cast(1 as int) as rn from src limit 1 + union all + select cast(100 as int) as rn from src limit 1 + union all + select cast(10000 as int) as rn from src limit 1 + ) t +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc1 +PREHOOK: query: create table orc_rn1 (rn int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_rn1 +POSTHOOK: query: create table orc_rn1 (rn int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_rn1 +PREHOOK: query: create table orc_rn2 (rn int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_rn2 +POSTHOOK: query: create table orc_rn2 (rn int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_rn2 +PREHOOK: query: create table orc_rn3 (rn int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_rn3 +POSTHOOK: query: create table orc_rn3 (rn int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_rn3 +PREHOOK: query: explain from orc1 a +insert overwrite table orc_rn1 select a.* where a.rn < 100 +insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000 +insert overwrite table orc_rn3 select a.* where a.rn >= 1000 +PREHOOK: type: QUERY +POSTHOOK: query: explain from orc1 a +insert overwrite table orc_rn1 select a.* where a.rn < 100 +insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000 +insert overwrite table orc_rn3 select a.* where a.rn >= 1000 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-4 + Stage-7 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-3 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 145 Data size: 580 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (rn < 100) (type: boolean) + Statistics: Num rows: 48 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rn (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 48 Data size: 192 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 48 Data size: 192 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.orc_rn1 + Filter Operator + predicate: ((rn >= 100) and (rn < 1000)) (type: boolean) + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rn (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.orc_rn2 + Filter Operator + predicate: (rn >= 1000) (type: boolean) + Statistics: Num rows: 48 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rn (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 48 Data size: 192 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 48 Data size: 192 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.orc_rn3 + Execution mode: vectorized + + Stage: Stage-4 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.orc_rn1 + + Stage: Stage-5 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.orc_rn2 + + Stage: Stage-6 + Stats-Aggr Operator + + Stage: Stage-2 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.orc_rn3 + + Stage: Stage-7 + Stats-Aggr Operator + +PREHOOK: query: from orc1 a +insert overwrite table orc_rn1 select a.* where a.rn < 100 +insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000 +insert overwrite table orc_rn3 select a.* where a.rn >= 1000 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc1 +PREHOOK: Output: default@orc_rn1 +PREHOOK: Output: default@orc_rn2 +PREHOOK: Output: default@orc_rn3 +POSTHOOK: query: from orc1 a +insert overwrite table orc_rn1 select a.* where a.rn < 100 +insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000 +insert overwrite table orc_rn3 select a.* where a.rn >= 1000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc1 +POSTHOOK: Output: default@orc_rn1 +POSTHOOK: Output: default@orc_rn2 +POSTHOOK: Output: default@orc_rn3 +POSTHOOK: Lineage: orc_rn1.rn SIMPLE [(orc1)a.FieldSchema(name:rn, type:int, comment:null), ] +POSTHOOK: Lineage: orc_rn2.rn SIMPLE [(orc1)a.FieldSchema(name:rn, type:int, comment:null), ] +POSTHOOK: Lineage: orc_rn3.rn SIMPLE [(orc1)a.FieldSchema(name:rn, type:int, comment:null), ] +PREHOOK: query: select * from orc_rn1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_rn1 +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_rn1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_rn1 +#### A masked pattern was here #### +1 +PREHOOK: query: select * from orc_rn2 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_rn2 +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_rn2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_rn2 +#### A masked pattern was here #### +100 +PREHOOK: query: select * from orc_rn3 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_rn3 +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_rn3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_rn3 +#### A masked pattern was here #### +10000