diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/AbstractCorrelationProcCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/AbstractCorrelationProcCtx.java index 174685b..5b673df 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/AbstractCorrelationProcCtx.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/AbstractCorrelationProcCtx.java @@ -20,6 +20,7 @@ import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATIONMINREDUCER; import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVESCRIPTOPERATORTRUST; +import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVEMAPSIDEAGGREGATE; import java.util.HashSet; import java.util.Set; @@ -39,11 +40,13 @@ // only one reducer if this configuration does not prevents private final int minReducer; private final Set> removedOps; + private final boolean isMapAggr; public AbstractCorrelationProcCtx(ParseContext pctx) { removedOps = new HashSet>(); trustScript = pctx.getConf().getBoolVar(HIVESCRIPTOPERATORTRUST); minReducer = pctx.getConf().getIntVar(HIVEOPTREDUCEDEDUPLICATIONMINREDUCER); + isMapAggr = pctx.getConf().getBoolVar(HIVEMAPSIDEAGGREGATE); this.pctx = pctx; } @@ -70,4 +73,8 @@ public boolean hasBeenRemoved(Operator rsOp) { public boolean addRemovedOperator(Operator rsOp) { return removedOps.add(rsOp); } + + public boolean isMapAggr() { + return isMapAggr; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java index 64bef21..7bb49be 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java @@ -29,6 +29,7 @@ import java.util.Map.Entry; import java.util.Set; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.ForwardOperator; @@ -44,6 +45,7 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.Utilities.ReduceField; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkDeDuplication.ReduceSinkDeduplicateProcCtx; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.AggregationDesc; @@ -163,10 +165,10 @@ protected static boolean hasGroupingSet(ReduceSinkOperator cRS) throws SemanticE return type.isInstance(parent) ? (T)parent : null; } - protected static Operator getStartForGroupBy(ReduceSinkOperator cRS) + protected static Operator getStartForGroupBy(ReduceSinkOperator cRS, ReduceSinkDeduplicateProcCtx dedupCtx) throws SemanticException { Operator parent = getSingleParent(cRS); - return parent instanceof GroupByOperator ? parent : cRS; // skip map-aggr GBY + return parent instanceof GroupByOperator && dedupCtx.isMapAggr() ? parent : cRS; // skip map-aggr GBY } @@ -240,6 +242,7 @@ protected static int indexOf(ExprNodeDesc cexpr, ExprNodeDesc[] pexprs, Operator || cursor instanceof FilterOperator || cursor instanceof ForwardOperator || cursor instanceof ScriptOperator + || cursor instanceof GroupByOperator || cursor instanceof ReduceSinkOperator)) { return null; } @@ -395,7 +398,7 @@ protected static void removeReduceSinkForGroupBy(ReduceSinkOperator cRS, GroupBy Operator parent = getSingleParent(cRS); - if (parent instanceof GroupByOperator) { + if ((parent instanceof GroupByOperator) && procCtx.isMapAggr()) { // pRS-cGBYm-cRS-cGBYr (map aggregation) --> pRS-cGBYr(COMPLETE) // copies desc of cGBYm to cGBYr and remove cGBYm and cRS GroupByOperator cGBYm = (GroupByOperator) parent; @@ -440,7 +443,7 @@ protected static void removeReduceSinkForGroupBy(ReduceSinkOperator cRS, GroupBy removeOperator(cRS, cGBYr, parent, context); procCtx.addRemovedOperator(cRS); - if (parent instanceof GroupByOperator) { + if ((parent instanceof GroupByOperator) && procCtx.isMapAggr()) { removeOperator(parent, cGBYr, getSingleParent(parent), context); procCtx.addRemovedOperator(cGBYr); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java index 7b5f9b2..56334ed 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java @@ -500,7 +500,7 @@ public Object process(ReduceSinkOperator cRS, ReduceSinkDeduplicateProcCtx dedup public Object process(ReduceSinkOperator cRS, GroupByOperator cGBY, ReduceSinkDeduplicateProcCtx dedupCtx) throws SemanticException { - Operator start = CorrelationUtilities.getStartForGroupBy(cRS); + Operator start = CorrelationUtilities.getStartForGroupBy(cRS, dedupCtx); GroupByOperator pGBY = CorrelationUtilities.findPossibleParent( start, GroupByOperator.class, dedupCtx.trustScript()); @@ -547,7 +547,7 @@ public Object process(ReduceSinkOperator cRS, ReduceSinkDeduplicateProcCtx dedup public Object process(ReduceSinkOperator cRS, GroupByOperator cGBY, ReduceSinkDeduplicateProcCtx dedupCtx) throws SemanticException { - Operator start = CorrelationUtilities.getStartForGroupBy(cRS); + Operator start = CorrelationUtilities.getStartForGroupBy(cRS, dedupCtx); JoinOperator pJoin = CorrelationUtilities.findPossibleParent( start, JoinOperator.class, dedupCtx.trustScript()); @@ -590,7 +590,7 @@ public Object process(ReduceSinkOperator cRS, ReduceSinkDeduplicateProcCtx dedup public Object process(ReduceSinkOperator cRS, GroupByOperator cGBY, ReduceSinkDeduplicateProcCtx dedupCtx) throws SemanticException { - Operator start = CorrelationUtilities.getStartForGroupBy(cRS); + Operator start = CorrelationUtilities.getStartForGroupBy(cRS, dedupCtx); ReduceSinkOperator pRS = CorrelationUtilities.findPossibleParent( start, ReduceSinkOperator.class, dedupCtx.trustScript()); diff --git a/ql/src/test/results/clientpositive/multi_insert_move_tasks_share_dependencies.q.out b/ql/src/test/results/clientpositive/multi_insert_move_tasks_share_dependencies.q.out index 8f9dd12..4d37eca 100644 --- a/ql/src/test/results/clientpositive/multi_insert_move_tasks_share_dependencies.q.out +++ b/ql/src/test/results/clientpositive/multi_insert_move_tasks_share_dependencies.q.out @@ -2821,11 +2821,10 @@ from src POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-3, Stage-5 - Stage-0 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-5 + Stage-3 depends on stages: Stage-2, Stage-4 + Stage-0 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-2 @@ -2840,7 +2839,7 @@ STAGE PLANS: Reduce Output Operator key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) + Map-reduce partition columns: key (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward @@ -2855,10 +2854,11 @@ STAGE PLANS: Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -2875,29 +2875,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 Dependency Collection Stage: Stage-0 @@ -2906,7 +2883,7 @@ STAGE PLANS: hdfs directory: false #### A masked pattern was here #### - Stage: Stage-5 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -2957,11 +2934,10 @@ from src POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-3, Stage-5 - Stage-0 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-5 + Stage-3 depends on stages: Stage-2, Stage-4 + Stage-0 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-2 @@ -2976,7 +2952,7 @@ STAGE PLANS: Reduce Output Operator key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) + Map-reduce partition columns: key (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward @@ -2991,10 +2967,11 @@ STAGE PLANS: Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -3011,29 +2988,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 Dependency Collection Stage: Stage-0 @@ -3042,7 +2996,7 @@ STAGE PLANS: hdfs directory: false #### A masked pattern was here #### - Stage: Stage-5 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -3093,11 +3047,10 @@ from src POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-3, Stage-5 - Stage-0 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-5 + Stage-3 depends on stages: Stage-2, Stage-4 + Stage-0 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-2 @@ -3112,7 +3065,7 @@ STAGE PLANS: Reduce Output Operator key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) + Map-reduce partition columns: key (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward @@ -3127,10 +3080,11 @@ STAGE PLANS: Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -3147,29 +3101,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 Dependency Collection Stage: Stage-0 @@ -3178,7 +3109,7 @@ STAGE PLANS: hdfs directory: false #### A masked pattern was here #### - Stage: Stage-5 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -3229,11 +3160,10 @@ from src POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-3, Stage-5 - Stage-0 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-5 + Stage-3 depends on stages: Stage-2, Stage-4 + Stage-0 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-2 @@ -3248,7 +3178,7 @@ STAGE PLANS: Reduce Output Operator key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) + Map-reduce partition columns: key (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward @@ -3263,10 +3193,11 @@ STAGE PLANS: Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -3283,29 +3214,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 Dependency Collection Stage: Stage-0 @@ -3314,7 +3222,7 @@ STAGE PLANS: hdfs directory: false #### A masked pattern was here #### - Stage: Stage-5 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -3369,15 +3277,14 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-4 is a root stage - Stage-6 depends on stages: Stage-4, Stage-8, Stage-9 + Stage-6 depends on stages: Stage-4, Stage-8 Stage-0 depends on stages: Stage-6 Stage-5 depends on stages: Stage-0 Stage-1 depends on stages: Stage-6 Stage-7 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-4 Stage-8 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-8 - Stage-9 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-9 + Stage-3 depends on stages: Stage-8 STAGE PLANS: Stage: Stage-4 @@ -3422,7 +3329,7 @@ STAGE PLANS: Reduce Output Operator key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) + Map-reduce partition columns: key (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward @@ -3437,10 +3344,11 @@ STAGE PLANS: Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -3485,36 +3393,13 @@ STAGE PLANS: Stage: Stage-7 Stats-Aggr Operator - Stage: Stage-8 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 Move Operator files: hdfs directory: false #### A masked pattern was here #### - Stage: Stage-9 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -3616,15 +3501,14 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-4 is a root stage - Stage-6 depends on stages: Stage-4, Stage-8, Stage-9 + Stage-6 depends on stages: Stage-4, Stage-8 Stage-0 depends on stages: Stage-6 Stage-5 depends on stages: Stage-0 Stage-1 depends on stages: Stage-6 Stage-7 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-4 Stage-8 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-8 - Stage-9 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-9 + Stage-3 depends on stages: Stage-8 STAGE PLANS: Stage: Stage-4 @@ -3669,7 +3553,7 @@ STAGE PLANS: Reduce Output Operator key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) + Map-reduce partition columns: key (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward @@ -3684,10 +3568,11 @@ STAGE PLANS: Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -3732,36 +3617,13 @@ STAGE PLANS: Stage: Stage-7 Stats-Aggr Operator - Stage: Stage-8 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 Move Operator files: hdfs directory: false #### A masked pattern was here #### - Stage: Stage-9 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -3865,7 +3727,7 @@ STAGE DEPENDENCIES: Stage-4 is a root stage Stage-11 depends on stages: Stage-4 , consists of Stage-8, Stage-7, Stage-9 Stage-8 - Stage-6 depends on stages: Stage-8, Stage-7, Stage-10, Stage-14, Stage-13, Stage-16, Stage-18, Stage-19 + Stage-6 depends on stages: Stage-8, Stage-7, Stage-10, Stage-14, Stage-13, Stage-16, Stage-4, Stage-18 Stage-0 depends on stages: Stage-6 Stage-5 depends on stages: Stage-0 Stage-1 depends on stages: Stage-6 @@ -3878,10 +3740,9 @@ STAGE DEPENDENCIES: Stage-13 Stage-15 Stage-16 depends on stages: Stage-15 + Stage-2 depends on stages: Stage-4 Stage-18 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-18 - Stage-19 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-19 + Stage-3 depends on stages: Stage-18 STAGE PLANS: Stage: Stage-4 @@ -3926,7 +3787,7 @@ STAGE PLANS: Reduce Output Operator key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) + Map-reduce partition columns: key (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward @@ -3941,10 +3802,11 @@ STAGE PLANS: Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -4067,36 +3929,13 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-18 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 Move Operator files: hdfs directory: false #### A masked pattern was here #### - Stage: Stage-19 + Stage: Stage-18 Map Reduce Map Operator Tree: TableScan @@ -4200,7 +4039,7 @@ STAGE DEPENDENCIES: Stage-4 is a root stage Stage-11 depends on stages: Stage-4 , consists of Stage-8, Stage-7, Stage-9 Stage-8 - Stage-6 depends on stages: Stage-8, Stage-7, Stage-10, Stage-14, Stage-13, Stage-16, Stage-18, Stage-19 + Stage-6 depends on stages: Stage-8, Stage-7, Stage-10, Stage-14, Stage-13, Stage-16, Stage-4, Stage-18 Stage-0 depends on stages: Stage-6 Stage-5 depends on stages: Stage-0 Stage-1 depends on stages: Stage-6 @@ -4213,10 +4052,9 @@ STAGE DEPENDENCIES: Stage-13 Stage-15 Stage-16 depends on stages: Stage-15 + Stage-2 depends on stages: Stage-4 Stage-18 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-18 - Stage-19 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-19 + Stage-3 depends on stages: Stage-18 STAGE PLANS: Stage: Stage-4 @@ -4261,7 +4099,7 @@ STAGE PLANS: Reduce Output Operator key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) + Map-reduce partition columns: key (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward @@ -4276,10 +4114,11 @@ STAGE PLANS: Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -4402,36 +4241,13 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-18 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 Move Operator files: hdfs directory: false #### A masked pattern was here #### - Stage: Stage-19 + Stage: Stage-18 Map Reduce Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/ptf.q.out b/ql/src/test/results/clientpositive/ptf.q.out index e61703c..9d34e4e 100644 --- a/ql/src/test/results/clientpositive/ptf.q.out +++ b/ql/src/test/results/clientpositive/ptf.q.out @@ -880,8 +880,7 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -938,7 +937,7 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator @@ -946,28 +945,6 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition diff --git a/ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out b/ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out index cddf923..9f3813c 100644 --- a/ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out +++ b/ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out @@ -2353,10 +2353,8 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 5 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -2370,35 +2368,9 @@ STAGE PLANS: Reduce Output Operator key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) + Map-reduce partition columns: key (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + Reducer 2 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -2410,16 +2382,13 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + File Output Operator + compressed: false Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 6 - Reduce Operator Tree: - Forward - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -2434,6 +2403,19 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -2480,10 +2462,8 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 5 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -2497,35 +2477,9 @@ STAGE PLANS: Reduce Output Operator key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) + Map-reduce partition columns: key (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + Reducer 2 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -2537,16 +2491,13 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + File Output Operator + compressed: false Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 6 - Reduce Operator Tree: - Forward - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -2561,6 +2512,19 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -2607,10 +2571,8 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 5 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -2624,35 +2586,9 @@ STAGE PLANS: Reduce Output Operator key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) + Map-reduce partition columns: key (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + Reducer 2 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -2664,16 +2600,13 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + File Output Operator + compressed: false Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 6 - Reduce Operator Tree: - Forward - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -2688,6 +2621,19 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -2734,10 +2680,8 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 5 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -2751,35 +2695,9 @@ STAGE PLANS: Reduce Output Operator key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) + Map-reduce partition columns: key (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + Reducer 2 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -2791,16 +2709,13 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + File Output Operator + compressed: false Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 6 - Reduce Operator Tree: - Forward - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -2815,6 +2730,19 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -2869,10 +2797,8 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 5 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -2916,35 +2842,9 @@ STAGE PLANS: Reduce Output Operator key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) + Map-reduce partition columns: key (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + Reducer 2 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -2956,16 +2856,13 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + File Output Operator + compressed: false Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 6 - Reduce Operator Tree: - Forward - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -2980,6 +2877,19 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-5 Dependency Collection @@ -3107,10 +3017,8 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 5 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -3154,35 +3062,9 @@ STAGE PLANS: Reduce Output Operator key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) + Map-reduce partition columns: key (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + Reducer 2 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -3194,16 +3076,13 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + File Output Operator + compressed: false Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 6 - Reduce Operator Tree: - Forward - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -3218,6 +3097,19 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-5 Dependency Collection @@ -3345,10 +3237,8 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 5 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -3392,35 +3282,9 @@ STAGE PLANS: Reduce Output Operator key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) + Map-reduce partition columns: key (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + Reducer 2 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -3432,16 +3296,13 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + File Output Operator + compressed: false Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 6 - Reduce Operator Tree: - Forward - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -3456,6 +3317,19 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-5 Dependency Collection @@ -3583,10 +3457,8 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 5 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -3630,35 +3502,9 @@ STAGE PLANS: Reduce Output Operator key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) + Map-reduce partition columns: key (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + Reducer 2 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -3670,16 +3516,13 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + File Output Operator + compressed: false Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 6 - Reduce Operator Tree: - Forward - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -3694,6 +3537,19 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-5 Dependency Collection diff --git a/ql/src/test/results/clientpositive/spark/ptf.q.out b/ql/src/test/results/clientpositive/spark/ptf.q.out index 647b83e..6beeaf4 100644 --- a/ql/src/test/results/clientpositive/spark/ptf.q.out +++ b/ql/src/test/results/clientpositive/spark/ptf.q.out @@ -868,8 +868,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (GROUP, 2) - Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -915,7 +914,7 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: @@ -924,18 +923,6 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int) - Reducer 4 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition diff --git a/ql/src/test/results/clientpositive/spark/union_remove_6_subq.q.out b/ql/src/test/results/clientpositive/spark/union_remove_6_subq.q.out index b5a2a75..7678309 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_6_subq.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_6_subq.q.out @@ -420,9 +420,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) - Reducer 6 <- Map 5 (GROUP, 2) - Reducer 3 <- Reducer 2 (GROUP, 2), Reducer 6 (GROUP, 2) - Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 4 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2), Reducer 5 (GROUP PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -446,7 +445,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Map 5 + Map 4 Map Operator Tree: TableScan alias: src @@ -487,7 +486,7 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: @@ -496,17 +495,6 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: bigint) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reducer 4 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition @@ -538,7 +526,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -554,7 +542,7 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 diff --git a/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out b/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out index 6960bee..32514ca 100644 --- a/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out @@ -1855,8 +1855,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (GROUP, 2) - Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -1956,7 +1955,7 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 auto parallelism: false @@ -1968,22 +1967,6 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col2 (type: int) - auto parallelism: false - Execution mode: vectorized - Reducer 4 - Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition diff --git a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out index 1c49f52..07c0764 100644 --- a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out +++ b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out @@ -4854,7 +4854,7 @@ PREHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) f PREHOOK: type: QUERY POSTHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a POSTHOOK: type: QUERY -Plan optimized by CBO. +Plan not optimized by CBO due to missing statistics. Please check log for more details. Vertex dependency in root stage Reducer 2 <- Map 1 (SIMPLE_EDGE) @@ -4864,29 +4864,29 @@ Stage-0 limit:-1 Stage-1 Reducer 2 - File Output Operator [FS_7] + File Output Operator [FS_6] compressed:false Statistics:Num rows: 2 Data size: 39 Basic stats: COMPLETE Column stats: NONE table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} - Group By Operator [GBY_5] + Group By Operator [GBY_4] | aggregations:["count(DISTINCT KEY._col1:0._col0)","count(DISTINCT KEY._col1:1._col0)","sum(VALUE._col2)"] | keys:KEY._col0 (type: int) | outputColumnNames:["_col0","_col1","_col2","_col3"] | Statistics:Num rows: 2 Data size: 39 Basic stats: COMPLETE Column stats: NONE |<-Map 1 [SIMPLE_EDGE] - Reduce Output Operator [RS_4] + Reduce Output Operator [RS_3] key expressions:_col0 (type: int), _col1 (type: int), _col2 (type: int) Map-reduce partition columns:_col0 (type: int) sort order:+++ Statistics:Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE value expressions:_col5 (type: bigint) - Group By Operator [GBY_3] - aggregations:["count(DISTINCT _col1)","count(DISTINCT _col2)","sum(_col3)"] - keys:_col0 (type: int), _col1 (type: int), _col2 (type: int) + Group By Operator [GBY_2] + aggregations:["count(DISTINCT b)","count(DISTINCT c)","sum(d)"] + keys:a (type: int), b (type: int), c (type: int) outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] Statistics:Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE Select Operator [SEL_1] - outputColumnNames:["_col0","_col1","_col2","_col3"] + outputColumnNames:["a","b","c","d"] Statistics:Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE TableScan [TS_0] alias:abcd @@ -4896,7 +4896,7 @@ PREHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) f PREHOOK: type: QUERY POSTHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a POSTHOOK: type: QUERY -Plan optimized by CBO. +Plan not optimized by CBO due to missing statistics. Please check log for more details. Vertex dependency in root stage Reducer 2 <- Map 1 (SIMPLE_EDGE) @@ -4906,24 +4906,24 @@ Stage-0 limit:-1 Stage-1 Reducer 2 - File Output Operator [FS_6] + File Output Operator [FS_5] compressed:false Statistics:Num rows: 2 Data size: 39 Basic stats: COMPLETE Column stats: NONE table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} - Group By Operator [GBY_4] + Group By Operator [GBY_3] | aggregations:["count(DISTINCT KEY._col1:0._col0)","count(DISTINCT KEY._col1:1._col0)","sum(VALUE._col0)"] | keys:KEY._col0 (type: int) | outputColumnNames:["_col0","_col1","_col2","_col3"] | Statistics:Num rows: 2 Data size: 39 Basic stats: COMPLETE Column stats: NONE |<-Map 1 [SIMPLE_EDGE] - Reduce Output Operator [RS_3] - key expressions:_col0 (type: int), _col1 (type: int), _col2 (type: int) - Map-reduce partition columns:_col0 (type: int) + Reduce Output Operator [RS_2] + key expressions:a (type: int), b (type: int), c (type: int) + Map-reduce partition columns:a (type: int) sort order:+++ Statistics:Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE - value expressions:_col3 (type: int) + value expressions:d (type: int) Select Operator [SEL_1] - outputColumnNames:["_col0","_col1","_col2","_col3"] + outputColumnNames:["a","b","c","d"] Statistics:Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE TableScan [TS_0] alias:abcd @@ -6317,7 +6317,7 @@ PREHOOK: query: explain select name,id FROM sales LEFT SEMI JOIN things ON (sale PREHOOK: type: QUERY POSTHOOK: query: explain select name,id FROM sales LEFT SEMI JOIN things ON (sales.id = things.id) POSTHOOK: type: QUERY -Plan optimized by CBO. +Plan not optimized by CBO due to missing statistics. Please check log for more details. Vertex dependency in root stage Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) @@ -6327,48 +6327,45 @@ Stage-0 limit:-1 Stage-1 Reducer 2 - File Output Operator [FS_12] + File Output Operator [FS_10] compressed:true Statistics:Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} - Merge Join Operator [MERGEJOIN_17] + Merge Join Operator [MERGEJOIN_15] | condition map:[{"":"Left Semi Join 0 to 1"}] - | keys:{"1":"_col0 (type: int)","0":"_col1 (type: int)"} + | keys:{"1":"_col0 (type: int)","0":"id (type: int)"} | outputColumnNames:["_col0","_col1"] | Statistics:Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE |<-Map 1 [SIMPLE_EDGE] - | Reduce Output Operator [RS_7] - | key expressions:_col1 (type: int) - | Map-reduce partition columns:_col1 (type: int) + | Reduce Output Operator [RS_5] + | key expressions:id (type: int) + | Map-reduce partition columns:id (type: int) | sort order:+ | Statistics:Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col0 (type: string) - | Select Operator [SEL_1] - | outputColumnNames:["_col0","_col1"] + | value expressions:name (type: string) + | Filter Operator [FIL_13] + | predicate:id is not null (type: boolean) | Statistics:Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_15] - | predicate:id is not null (type: boolean) + | TableScan [TS_0] + | alias:sales | Statistics:Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_0] - | alias:sales - | Statistics:Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE |<-Map 3 [SIMPLE_EDGE] - Reduce Output Operator [RS_9] + Reduce Output Operator [RS_7] key expressions:_col0 (type: int) Map-reduce partition columns:_col0 (type: int) sort order:+ Statistics:Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Group By Operator [GBY_5] - keys:_col0 (type: int) + Group By Operator [GBY_3] + keys:id (type: int) outputColumnNames:["_col0"] Statistics:Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Select Operator [SEL_3] - outputColumnNames:["_col0"] + Select Operator [SEL_2] + outputColumnNames:["id"] Statistics:Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Filter Operator [FIL_16] + Filter Operator [FIL_14] predicate:id is not null (type: boolean) Statistics:Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - TableScan [TS_2] + TableScan [TS_1] alias:things Statistics:Num rows: 2 Data size: 12 Basic stats: COMPLETE Column stats: NONE @@ -6931,13 +6928,12 @@ Vertex dependency in root stage Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 5 + Reducer 4 File Output Operator [FS_14] compressed:true Statistics:Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE @@ -6948,52 +6944,42 @@ Stage-0 PTF Operator [PTF_11] Function definitions:[{"Input definition":{"type:":"WINDOWING"}},{"partition by:":"_col0","name:":"windowingtablefunction","order by:":"_col1"}] Statistics:Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator [SEL_10] + Group By Operator [GBY_8] + | keys:KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) | outputColumnNames:["_col0","_col1","_col2"] | Statistics:Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 4 [SIMPLE_EDGE] - Reduce Output Operator [RS_9] - key expressions:_col0 (type: string), _col1 (type: string) + |<-Reducer 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_7] + key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: int) Map-reduce partition columns:_col0 (type: string) - sort order:++ + sort order:+++ Statistics:Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - value expressions:_col2 (type: int) - Group By Operator [GBY_8] + Group By Operator [GBY_6] | keys:KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) | outputColumnNames:["_col0","_col1","_col2"] | Statistics:Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 3 [SIMPLE_EDGE] - Reduce Output Operator [RS_7] - key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: int) - Map-reduce partition columns:_col0 (type: string), _col1 (type: string), _col2 (type: int) + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_5] + key expressions:_col2 (type: string), _col1 (type: string), _col5 (type: int) + Map-reduce partition columns:rand() (type: double) sort order:+++ - Statistics:Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator [GBY_6] - | keys:KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) - | outputColumnNames:["_col0","_col1","_col2"] - | Statistics:Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 2 [SIMPLE_EDGE] - Reduce Output Operator [RS_5] - key expressions:_col2 (type: string), _col1 (type: string), _col5 (type: int) - Map-reduce partition columns:rand() (type: double) - sort order:+++ - Statistics:Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE - PTF Operator [PTF_3] - Function definitions:[{"Input definition":{"type:":"TABLE"}},{"Partition table definition":{"partition by:":"_col2","name:":"noop","order by:":"_col1"}}] - Statistics:Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator [SEL_2] - | outputColumnNames:["_col1","_col2","_col5"] - | Statistics:Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE - |<-Map 1 [SIMPLE_EDGE] - Reduce Output Operator [RS_1] - key expressions:p_mfgr (type: string), p_name (type: string) - Map-reduce partition columns:p_mfgr (type: string) - sort order:++ - Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE - value expressions:p_size (type: int) - TableScan [TS_0] - alias:part - Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + Statistics:Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator [PTF_3] + Function definitions:[{"Input definition":{"type:":"TABLE"}},{"Partition table definition":{"partition by:":"_col2","name:":"noop","order by:":"_col1"}}] + Statistics:Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_2] + | outputColumnNames:["_col1","_col2","_col5"] + | Statistics:Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_1] + key expressions:p_mfgr (type: string), p_name (type: string) + Map-reduce partition columns:p_mfgr (type: string) + sort order:++ + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:p_size (type: int) + TableScan [TS_0] + alias:part + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE PREHOOK: query: explain select abc.* diff --git a/ql/src/test/results/clientpositive/tez/ptf.q.out b/ql/src/test/results/clientpositive/tez/ptf.q.out index 88d1a98..b134440 100644 --- a/ql/src/test/results/clientpositive/tez/ptf.q.out +++ b/ql/src/test/results/clientpositive/tez/ptf.q.out @@ -869,7 +869,6 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -915,7 +914,7 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: @@ -924,18 +923,6 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int) - Reducer 4 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition diff --git a/ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out b/ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out index c2e9b1a..2dad1e7 100644 --- a/ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out +++ b/ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out @@ -1857,7 +1857,6 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1957,7 +1956,7 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 auto parallelism: true @@ -1969,22 +1968,6 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col2 (type: int) - auto parallelism: true - Execution mode: vectorized - Reducer 4 - Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition diff --git a/ql/src/test/results/clientpositive/union_remove_6_subq.q.out b/ql/src/test/results/clientpositive/union_remove_6_subq.q.out index a38548f..05363e4 100644 --- a/ql/src/test/results/clientpositive/union_remove_6_subq.q.out +++ b/ql/src/test/results/clientpositive/union_remove_6_subq.q.out @@ -447,10 +447,9 @@ WINDOW w AS (PARTITION BY key ORDER BY c ROWS UNBOUNDED PRECEDING) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-3 depends on stages: Stage-2 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -507,7 +506,7 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE TableScan Union @@ -520,7 +519,7 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator @@ -528,27 +527,6 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: bigint) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition @@ -581,7 +559,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/vectorized_ptf.q.out b/ql/src/test/results/clientpositive/vectorized_ptf.q.out index 79edb0e..e65a880 100644 --- a/ql/src/test/results/clientpositive/vectorized_ptf.q.out +++ b/ql/src/test/results/clientpositive/vectorized_ptf.q.out @@ -2001,8 +2001,7 @@ TOK_QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -2125,7 +2124,7 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 auto parallelism: false @@ -2161,68 +2160,6 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string,string,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col2 (type: int) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10003 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string,string,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string,string,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Truncated Path -> Alias: -#### A masked pattern was here #### - Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition