diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java index 2bd40fa6782804804a3b02b312b9e2bfc3cd9e61..4d9d1da77394125cfcb9ac9ccf1c00528664b981 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java @@ -143,4 +143,8 @@ private static void iterateParents(Operator operator, Function> f } } } + + public static boolean sameRowSchema(Operator operator1, Operator operator2) { + return operator1.getSchema().equals(operator2.getSchema()); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java index 71cc7eb409adc86e5f729b9b8fd510f95aabb7d7..450d7f364d1d83facf58e85a9bce87029b53d0d9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java @@ -20,6 +20,7 @@ import java.io.Serializable; import java.util.ArrayList; +import java.util.Iterator; /** * RowSchema Implementation. @@ -49,6 +50,51 @@ public void setSignature(ArrayList signature) { } @Override + public boolean equals(Object obj) { + if (!(obj instanceof RowSchema) || (obj == null)) { + return false; + } + if(this == obj) { + return true; + } + + RowSchema dest = (RowSchema)obj; + if(this.signature == null && dest.getSignature() == null) { + return true; + } + if((this.signature == null && dest.getSignature() != null) || + (this.signature != null && dest.getSignature() == null) ) { + return false; + } + + if(this.signature.size() != dest.getSignature().size()) { + return false; + } + + Iterator origIt = this.signature.iterator(); + Iterator destIt = dest.getSignature().iterator(); + while(origIt.hasNext()) { + ColumnInfo origColumn = origIt.next(); + ColumnInfo destColumn = destIt.next(); + + if(origColumn == null && destColumn == null) { + continue; + } + + if((origColumn == null && destColumn != null) || + (origColumn != null && destColumn == null) ) { + return false; + } + + if(!origColumn.equals(destColumn)) { + return false; + } + } + + return true; + } + + @Override public String toString() { StringBuilder sb = new StringBuilder(); sb.append('('); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java index 42b546bdb2bf21119e52e6a08480f13d5f2fc344..95d2d76c80aa59b62e9464f704523d921302d401 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java @@ -125,4 +125,31 @@ public boolean supportUnionRemoveOptimization() { public boolean acceptLimitPushdown() { return true; } + + /** + * Checks whether this select operator does something to the + * input tuples. + * + * @return if it is an identity select operator or not + */ + public boolean isIdentitySelect() { + //Safety check + if(this.getNumParent() != 1) { + return false; + } + + //Select * + if(this.getConf().isSelStarNoCompute() || + this.getConf().isSelectStar()) { + return true; + } + + //Check whether the have the same schema + if(!OperatorUtils.sameRowSchema(this, this.getParentOperators().get(0))) { + return false; + } + + return true; + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 355bc1f814af3bf419e8a4b11a03de72c98c4bbd..5c165a296ef46ac0d0943386563bb4feb226450f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -93,6 +93,7 @@ import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.RowSchema; import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator; +import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; @@ -9053,38 +9054,65 @@ private Operator genUnionPlan(String unionalias, String leftalias, rightOp = genInputSelectForUnion(rightOp, rightmap, rightalias, unionoutRR, unionalias); } - // If one of the children is a union, merge with it + // If one of the children (left or right) is: + // (i) a union, or + // (ii) an identity projection followed by a union, + // merge with it // else create a new one - if ((leftOp instanceof UnionOperator) || (rightOp instanceof UnionOperator)) { - if (leftOp instanceof UnionOperator) { - // make left a child of right - List> child = - new ArrayList>(); - child.add(leftOp); - rightOp.setChildOperators(child); - - List> parent = leftOp - .getParentOperators(); - parent.add(rightOp); - - UnionDesc uDesc = ((UnionOperator) leftOp).getConf(); - uDesc.setNumInputs(uDesc.getNumInputs() + 1); - return putOpInsertMap(leftOp, unionoutRR); - } else { - // make right a child of left - List> child = - new ArrayList>(); - child.add(rightOp); - leftOp.setChildOperators(child); - - List> parent = rightOp - .getParentOperators(); - parent.add(leftOp); - UnionDesc uDesc = ((UnionOperator) rightOp).getConf(); - uDesc.setNumInputs(uDesc.getNumInputs() + 1); - - return putOpInsertMap(rightOp, unionoutRR); - } + if (leftOp instanceof UnionOperator || + (leftOp instanceof SelectOperator && + leftOp.getParentOperators() != null && + !leftOp.getParentOperators().isEmpty() && + leftOp.getParentOperators().get(0) instanceof UnionOperator && + ((SelectOperator)leftOp).isIdentitySelect()) ) { + + if(!(leftOp instanceof UnionOperator)) { + Operator oldChild = leftOp; + leftOp = (Operator) leftOp.getParentOperators().get(0); + leftOp.removeChildAndAdoptItsChildren(oldChild); + } + + // make left a child of right + List> child = + new ArrayList>(); + child.add(leftOp); + rightOp.setChildOperators(child); + + List> parent = leftOp + .getParentOperators(); + parent.add(rightOp); + + UnionDesc uDesc = ((UnionOperator) leftOp).getConf(); + uDesc.setNumInputs(uDesc.getNumInputs() + 1); + return putOpInsertMap(leftOp, unionoutRR); + } + + if (rightOp instanceof UnionOperator || + (rightOp instanceof SelectOperator && + rightOp.getParentOperators() != null && + !rightOp.getParentOperators().isEmpty() && + rightOp.getParentOperators().get(0) instanceof UnionOperator && + ((SelectOperator)rightOp).isIdentitySelect()) ) { + + if(!(rightOp instanceof UnionOperator)) { + Operator oldChild = rightOp; + rightOp = (Operator) rightOp.getParentOperators().get(0); + rightOp.removeChildAndAdoptItsChildren(oldChild); + } + + // make right a child of left + List> child = + new ArrayList>(); + child.add(rightOp); + leftOp.setChildOperators(child); + + List> parent = rightOp + .getParentOperators(); + parent.add(leftOp); + UnionDesc uDesc = ((UnionOperator) rightOp).getConf(); + uDesc.setNumInputs(uDesc.getNumInputs() + 1); + + return putOpInsertMap(rightOp, unionoutRR); } // Create a new union operator diff --git a/ql/src/test/results/clientpositive/union29.q.out b/ql/src/test/results/clientpositive/union29.q.out index 41dde4e1ce5d0549a5501a35d45c06a506e695db..3a60d0a036690824bb5bf1e8cd22750dc79dae39 100644 --- a/ql/src/test/results/clientpositive/union29.q.out +++ b/ql/src/test/results/clientpositive/union29.q.out @@ -75,25 +75,19 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Union - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string) + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Union + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_subq_union + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.union_subq_union TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -102,25 +96,19 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Union - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string) + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Union + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_subq_union + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.union_subq_union Stage: Stage-7 Conditional Operator diff --git a/ql/src/test/results/clientpositive/union30.q.out b/ql/src/test/results/clientpositive/union30.q.out index b5065368d3f27a6036d681413f5b3d3bba0af062..59934c65b4d7019afdb10d449b8e30e9ef332a50 100644 --- a/ql/src/test/results/clientpositive/union30.q.out +++ b/ql/src/test/results/clientpositive/union30.q.out @@ -48,17 +48,16 @@ select key, value from src POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-11 + Stage-2 depends on stages: Stage-1, Stage-10 Stage-3 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-3 - Stage-10 depends on stages: Stage-4 , consists of Stage-7, Stage-6, Stage-8 - Stage-7 - Stage-0 depends on stages: Stage-7, Stage-6, Stage-9 - Stage-5 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-3 , consists of Stage-6, Stage-5, Stage-7 Stage-6 - Stage-8 - Stage-9 depends on stages: Stage-8 - Stage-11 is a root stage + Stage-0 depends on stages: Stage-6, Stage-5, Stage-8 + Stage-4 depends on stages: Stage-0 + Stage-5 + Stage-7 + Stage-8 depends on stages: Stage-7 + Stage-10 is a root stage STAGE PLANS: Stage: Stage-1 @@ -135,42 +134,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TableScan - Union - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan Union Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -206,11 +169,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.union_subq_union - Stage: Stage-10 + Stage: Stage-9 Conditional Operator - Stage: Stage-7 + Stage: Stage-6 Move Operator files: hdfs directory: true @@ -226,10 +210,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union - Stage: Stage-5 + Stage: Stage-4 Stats-Aggr Operator - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -241,7 +225,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union - Stage: Stage-8 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -253,13 +237,13 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union - Stage: Stage-9 + Stage: Stage-8 Move Operator files: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-11 + Stage: Stage-10 Map Reduce Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/union34.q.out b/ql/src/test/results/clientpositive/union34.q.out index dc46ac7b6c62f1b97ed8f8e176e0797ca507e72e..0ab57d6a27cd5934f80f53036ba0f03f508aa07f 100644 --- a/ql/src/test/results/clientpositive/union34.q.out +++ b/ql/src/test/results/clientpositive/union34.q.out @@ -121,22 +121,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Union - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Union + Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: _col1 (type: string) TableScan alias: src10_4 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE @@ -145,22 +139,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Union - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Union + Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: _col1 (type: string) TableScan alias: src10_2 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE @@ -375,22 +363,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Union - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Union + Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: _col1 (type: string) TableScan alias: src10_4 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE @@ -399,22 +381,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Union - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Union + Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) diff --git a/ql/src/test/results/clientpositive/union_remove_10.q.out b/ql/src/test/results/clientpositive/union_remove_10.q.out index b078793cd216fd3a8aee059f7070706a42b25b5f..344ed9c1ef60e7f0f043838c91b6bfc8c201e2d9 100644 --- a/ql/src/test/results/clientpositive/union_remove_10.q.out +++ b/ql/src/test/results/clientpositive/union_remove_10.q.out @@ -78,13 +78,13 @@ select * FROM ( POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-7 depends on stages: Stage-2, Stage-8 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-6 depends on stages: Stage-1, Stage-7, Stage-8 , consists of Stage-3, Stage-2, Stage-4 Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-3, Stage-2, Stage-5 + Stage-2 + Stage-4 + Stage-5 depends on stages: Stage-4 + Stage-7 is a root stage Stage-8 is a root stage STAGE PLANS: @@ -123,55 +123,17 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 - TableScan - alias: inputtbl1 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: key (type: string), UDFToLong(2) (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Union - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 - - Stage: Stage-7 + Stage: Stage-6 Conditional Operator - Stage: Stage-4 + Stage: Stage-3 Move Operator files: hdfs directory: true @@ -187,27 +149,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.outputtbl1 - Stage: Stage-3 + Stage: Stage-2 Merge File Operator Map Operator Tree: RCFile Merge Operator merge level: block input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - Stage: Stage-5 + Stage: Stage-4 Merge File Operator Map Operator Tree: RCFile Merge Operator merge level: block input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - Stage: Stage-6 + Stage: Stage-5 Move Operator files: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-8 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -226,6 +188,25 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.outputtbl1 + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + alias: inputtbl1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: string), UDFToLong(2) (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 + PREHOOK: query: insert overwrite table outputTbl1 SELECT * FROM ( @@ -255,7 +236,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@inputtbl1 POSTHOOK: Output: default@outputtbl1 POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, ] PREHOOK: query: desc formatted outputTbl1 PREHOOK: type: DESCTABLE PREHOOK: Input: default@outputtbl1 diff --git a/ql/src/test/results/clientpositive/union_remove_11.q.out b/ql/src/test/results/clientpositive/union_remove_11.q.out index 82d91a9d036b5c1a61debfe9258b84e8ae2b8f8e..6db85b61d0631702607ca7879d84cabc4f6715b8 100644 --- a/ql/src/test/results/clientpositive/union_remove_11.q.out +++ b/ql/src/test/results/clientpositive/union_remove_11.q.out @@ -97,25 +97,19 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Union - Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: int) + expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE - Union + Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 TableScan alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE @@ -145,25 +139,19 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Union - Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: int) + expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE - Union + Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 Stage: Stage-6 Conditional Operator