diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java index c973d98..fec6822 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java @@ -96,7 +96,8 @@ public ParseContext transform(ParseContext pCtx) throws SemanticException { // Walk the tree again to see if the union can be removed completely HiveConf conf = pCtx.getConf(); opRules.clear(); - if (conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_UNION_REMOVE)) { + if (conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_UNION_REMOVE) && + !conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")) { if (!conf.getBoolVar(HiveConf.ConfVars.HIVE_HADOOP_SUPPORTS_SUBDIRECTORIES)) { throw new diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java index dbf4fb2..3cdfc51 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java @@ -216,9 +216,6 @@ public void removeUnionOperators(Configuration conf, GenSparkProcContext context if (current instanceof FileSinkOperator) { FileSinkOperator fileSink = (FileSinkOperator)current; - // remember it for additional processing later - context.fileSinkSet.add(fileSink); - FileSinkDesc desc = fileSink.getConf(); Path path = desc.getDirName(); List linked; diff --git a/ql/src/test/queries/clientpositive/union_remove_1.q b/ql/src/test/queries/clientpositive/union_remove_1.q index c87b3fe..0db1743 100644 --- a/ql/src/test/queries/clientpositive/union_remove_1.q +++ b/ql/src/test/queries/clientpositive/union_remove_1.q @@ -2,6 +2,7 @@ set hive.stats.autogather=false; set hive.optimize.union.remove=true; set hive.mapred.supports.subdirectories=true; +set hive.merge.sparkfiles=false; set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; set mapred.input.dir.recursive=true; diff --git a/ql/src/test/queries/clientpositive/union_remove_10.q b/ql/src/test/queries/clientpositive/union_remove_10.q index 6701952..1e9c201 100644 --- a/ql/src/test/queries/clientpositive/union_remove_10.q +++ b/ql/src/test/queries/clientpositive/union_remove_10.q @@ -2,6 +2,7 @@ set hive.stats.autogather=false; set hive.optimize.union.remove=true; set hive.mapred.supports.subdirectories=true; +set hive.merge.sparkfiles=true; set hive.merge.mapfiles=true; set hive.merge.mapredfiles=true; set hive.merge.smallfiles.avgsize=1; diff --git a/ql/src/test/queries/clientpositive/union_remove_11.q b/ql/src/test/queries/clientpositive/union_remove_11.q index 4b2fa42..7052c69 100644 --- a/ql/src/test/queries/clientpositive/union_remove_11.q +++ b/ql/src/test/queries/clientpositive/union_remove_11.q @@ -2,6 +2,7 @@ set hive.stats.autogather=false; set hive.optimize.union.remove=true; set hive.mapred.supports.subdirectories=true; +set hive.merge.sparkfiles=true; set hive.merge.mapfiles=true; set hive.merge.mapredfiles=true; set hive.merge.smallfiles.avgsize=1; diff --git a/ql/src/test/queries/clientpositive/union_remove_12.q b/ql/src/test/queries/clientpositive/union_remove_12.q index 69d0d0a..67a1829 100644 --- a/ql/src/test/queries/clientpositive/union_remove_12.q +++ b/ql/src/test/queries/clientpositive/union_remove_12.q @@ -3,6 +3,7 @@ set hive.optimize.union.remove=true; set hive.mapred.supports.subdirectories=true; set hive.auto.convert.join=true; +set hive.merge.sparkfiles=true; set hive.merge.mapfiles=true; set hive.merge.mapredfiles=true; set hive.merge.smallfiles.avgsize=1; diff --git a/ql/src/test/queries/clientpositive/union_remove_13.q b/ql/src/test/queries/clientpositive/union_remove_13.q index 7605f0e..29c164a 100644 --- a/ql/src/test/queries/clientpositive/union_remove_13.q +++ b/ql/src/test/queries/clientpositive/union_remove_13.q @@ -3,6 +3,7 @@ set hive.optimize.union.remove=true; set hive.mapred.supports.subdirectories=true; set hive.auto.convert.join=true; +set hive.merge.sparkfiles=true; set hive.merge.mapfiles=true; set hive.merge.mapredfiles=true; set hive.merge.smallfiles.avgsize=1; diff --git a/ql/src/test/queries/clientpositive/union_remove_14.q b/ql/src/test/queries/clientpositive/union_remove_14.q index a4fdfc8..ca2f5e5 100644 --- a/ql/src/test/queries/clientpositive/union_remove_14.q +++ b/ql/src/test/queries/clientpositive/union_remove_14.q @@ -2,6 +2,7 @@ set hive.stats.autogather=false; set hive.optimize.union.remove=true; set hive.mapred.supports.subdirectories=true; +set hive.merge.sparkfiles=true; set hive.merge.mapfiles=true; set hive.merge.mapredfiles=true; set hive.auto.convert.join=true; diff --git a/ql/src/test/queries/clientpositive/union_remove_15.q b/ql/src/test/queries/clientpositive/union_remove_15.q index e3c937b..72ced75 100644 --- a/ql/src/test/queries/clientpositive/union_remove_15.q +++ b/ql/src/test/queries/clientpositive/union_remove_15.q @@ -2,6 +2,7 @@ set hive.stats.autogather=false; set hive.optimize.union.remove=true; set hive.mapred.supports.subdirectories=true; +set hive.merge.sparkfiles=false; set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; diff --git a/ql/src/test/queries/clientpositive/union_remove_16.q b/ql/src/test/queries/clientpositive/union_remove_16.q index 537078b..72e6cb1 100644 --- a/ql/src/test/queries/clientpositive/union_remove_16.q +++ b/ql/src/test/queries/clientpositive/union_remove_16.q @@ -2,6 +2,7 @@ set hive.stats.autogather=false; set hive.optimize.union.remove=true; set hive.mapred.supports.subdirectories=true; +set hive.merge.sparkfiles=true; set hive.merge.mapfiles=true; set hive.merge.mapredfiles=true; set hive.merge.smallfiles.avgsize=1; diff --git a/ql/src/test/queries/clientpositive/union_remove_17.q b/ql/src/test/queries/clientpositive/union_remove_17.q index d70f3d3..fa68755 100644 --- a/ql/src/test/queries/clientpositive/union_remove_17.q +++ b/ql/src/test/queries/clientpositive/union_remove_17.q @@ -2,6 +2,7 @@ set hive.stats.autogather=false; set hive.optimize.union.remove=true; set hive.mapred.supports.subdirectories=true; +set hive.merge.sparkfiles=false; set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; diff --git a/ql/src/test/queries/clientpositive/union_remove_18.q b/ql/src/test/queries/clientpositive/union_remove_18.q index 6352bc3..6d2d331 100644 --- a/ql/src/test/queries/clientpositive/union_remove_18.q +++ b/ql/src/test/queries/clientpositive/union_remove_18.q @@ -2,6 +2,7 @@ set hive.stats.autogather=false; set hive.optimize.union.remove=true; set hive.mapred.supports.subdirectories=true; +set hive.merge.sparkfiles=false; set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; diff --git a/ql/src/test/queries/clientpositive/union_remove_19.q b/ql/src/test/queries/clientpositive/union_remove_19.q index 8d75afa..17b8a0f 100644 --- a/ql/src/test/queries/clientpositive/union_remove_19.q +++ b/ql/src/test/queries/clientpositive/union_remove_19.q @@ -2,6 +2,7 @@ set hive.stats.autogather=false; set hive.optimize.union.remove=true; set hive.mapred.supports.subdirectories=true; +set hive.merge.sparkfiles=false; set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; set mapred.input.dir.recursive=true; diff --git a/ql/src/test/queries/clientpositive/union_remove_2.q b/ql/src/test/queries/clientpositive/union_remove_2.q index 83cd288..0142325 100644 --- a/ql/src/test/queries/clientpositive/union_remove_2.q +++ b/ql/src/test/queries/clientpositive/union_remove_2.q @@ -2,6 +2,7 @@ set hive.stats.autogather=false; set hive.optimize.union.remove=true; set hive.mapred.supports.subdirectories=true; +set hive.merge.sparkfiles=false; set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; set mapred.input.dir.recursive=true; diff --git a/ql/src/test/queries/clientpositive/union_remove_20.q b/ql/src/test/queries/clientpositive/union_remove_20.q index f80f7c1..1c59ef2 100644 --- a/ql/src/test/queries/clientpositive/union_remove_20.q +++ b/ql/src/test/queries/clientpositive/union_remove_20.q @@ -2,6 +2,7 @@ set hive.stats.autogather=false; set hive.optimize.union.remove=true; set hive.mapred.supports.subdirectories=true; +set hive.merge.sparkfiles=false; set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; set mapred.input.dir.recursive=true; diff --git a/ql/src/test/queries/clientpositive/union_remove_21.q b/ql/src/test/queries/clientpositive/union_remove_21.q index 8963c25..cbaa08b 100644 --- a/ql/src/test/queries/clientpositive/union_remove_21.q +++ b/ql/src/test/queries/clientpositive/union_remove_21.q @@ -2,6 +2,7 @@ set hive.stats.autogather=false; set hive.optimize.union.remove=true; set hive.mapred.supports.subdirectories=true; +set hive.merge.sparkfiles=false; set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; set mapred.input.dir.recursive=true; diff --git a/ql/src/test/queries/clientpositive/union_remove_22.q b/ql/src/test/queries/clientpositive/union_remove_22.q index b0c1ccd..982912b 100644 --- a/ql/src/test/queries/clientpositive/union_remove_22.q +++ b/ql/src/test/queries/clientpositive/union_remove_22.q @@ -2,6 +2,7 @@ set hive.stats.autogather=false; set hive.optimize.union.remove=true; set hive.mapred.supports.subdirectories=true; +set hive.merge.sparkfiles=false; set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; set mapred.input.dir.recursive=true; diff --git a/ql/src/test/queries/clientpositive/union_remove_23.q b/ql/src/test/queries/clientpositive/union_remove_23.q index a1b989a..63e4418 100644 --- a/ql/src/test/queries/clientpositive/union_remove_23.q +++ b/ql/src/test/queries/clientpositive/union_remove_23.q @@ -2,6 +2,7 @@ set hive.stats.autogather=false; set hive.optimize.union.remove=true; set hive.mapred.supports.subdirectories=true; +set hive.merge.sparkfiles=false; set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; set mapred.input.dir.recursive=true; diff --git a/ql/src/test/queries/clientpositive/union_remove_24.q b/ql/src/test/queries/clientpositive/union_remove_24.q index ec561e0..88c378d 100644 --- a/ql/src/test/queries/clientpositive/union_remove_24.q +++ b/ql/src/test/queries/clientpositive/union_remove_24.q @@ -2,6 +2,7 @@ set hive.stats.autogather=false; set hive.optimize.union.remove=true; set hive.mapred.supports.subdirectories=true; +set hive.merge.sparkfiles=false; set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; set mapred.input.dir.recursive=true; diff --git a/ql/src/test/queries/clientpositive/union_remove_25.q b/ql/src/test/queries/clientpositive/union_remove_25.q index 76c1ff5..c6c09e1 100644 --- a/ql/src/test/queries/clientpositive/union_remove_25.q +++ b/ql/src/test/queries/clientpositive/union_remove_25.q @@ -3,6 +3,7 @@ set hive.optimize.union.remove=true; set hive.mapred.supports.subdirectories=true; set hive.exec.dynamic.partition=true; set hive.exec.dynamic.partition.mode=nonstrict; +set hive.merge.sparkfiles=false; set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; set mapred.input.dir.recursive=true; diff --git a/ql/src/test/queries/clientpositive/union_remove_3.q b/ql/src/test/queries/clientpositive/union_remove_3.q index 9617f73..7e1b113 100644 --- a/ql/src/test/queries/clientpositive/union_remove_3.q +++ b/ql/src/test/queries/clientpositive/union_remove_3.q @@ -2,6 +2,7 @@ set hive.stats.autogather=false; set hive.optimize.union.remove=true; set hive.mapred.supports.subdirectories=true; +set hive.merge.sparkfiles=false; set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; set mapred.input.dir.recursive=true; diff --git a/ql/src/test/queries/clientpositive/union_remove_4.q b/ql/src/test/queries/clientpositive/union_remove_4.q index cae323b..44b31d6 100644 --- a/ql/src/test/queries/clientpositive/union_remove_4.q +++ b/ql/src/test/queries/clientpositive/union_remove_4.q @@ -2,6 +2,7 @@ set hive.stats.autogather=false; set hive.optimize.union.remove=true; set hive.mapred.supports.subdirectories=true; +set hive.merge.sparkfiles=true; set hive.merge.mapfiles=true; set hive.merge.mapredfiles=true; set mapred.input.dir.recursive=true; diff --git a/ql/src/test/queries/clientpositive/union_remove_5.q b/ql/src/test/queries/clientpositive/union_remove_5.q index 5df84e1..c5c0b7f 100644 --- a/ql/src/test/queries/clientpositive/union_remove_5.q +++ b/ql/src/test/queries/clientpositive/union_remove_5.q @@ -2,6 +2,7 @@ set hive.stats.autogather=false; set hive.optimize.union.remove=true; set hive.mapred.supports.subdirectories=true; +set hive.merge.sparkfiles=true; set hive.merge.mapfiles=true; set hive.merge.mapredfiles=true; set hive.merge.smallfiles.avgsize=1; diff --git a/ql/src/test/queries/clientpositive/union_remove_6.q b/ql/src/test/queries/clientpositive/union_remove_6.q index bfce26d..6990ed2 100644 --- a/ql/src/test/queries/clientpositive/union_remove_6.q +++ b/ql/src/test/queries/clientpositive/union_remove_6.q @@ -2,6 +2,7 @@ set hive.stats.autogather=false; set hive.optimize.union.remove=true; set hive.mapred.supports.subdirectories=true; +set hive.merge.sparkfiles=false; set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; set mapred.input.dir.recursive=true; diff --git a/ql/src/test/queries/clientpositive/union_remove_7.q b/ql/src/test/queries/clientpositive/union_remove_7.q index 3a95674..c254aba 100644 --- a/ql/src/test/queries/clientpositive/union_remove_7.q +++ b/ql/src/test/queries/clientpositive/union_remove_7.q @@ -2,6 +2,7 @@ set hive.stats.autogather=false; set hive.optimize.union.remove=true; set hive.mapred.supports.subdirectories=true; +set hive.merge.sparkfiles=false; set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; set mapred.input.dir.recursive=true; diff --git a/ql/src/test/queries/clientpositive/union_remove_8.q b/ql/src/test/queries/clientpositive/union_remove_8.q index a83a43e..8dfb8e8 100644 --- a/ql/src/test/queries/clientpositive/union_remove_8.q +++ b/ql/src/test/queries/clientpositive/union_remove_8.q @@ -2,6 +2,7 @@ set hive.stats.autogather=false; set hive.optimize.union.remove=true; set hive.mapred.supports.subdirectories=true; +set hive.merge.sparkfiles=false; set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; set mapred.input.dir.recursive=true; diff --git a/ql/src/test/queries/clientpositive/union_remove_9.q b/ql/src/test/queries/clientpositive/union_remove_9.q index e71f6dd..c9a4dc3 100644 --- a/ql/src/test/queries/clientpositive/union_remove_9.q +++ b/ql/src/test/queries/clientpositive/union_remove_9.q @@ -2,6 +2,7 @@ set hive.stats.autogather=false; set hive.optimize.union.remove=true; set hive.mapred.supports.subdirectories=true; +set hive.merge.sparkfiles=true; set hive.merge.mapfiles=true; set hive.merge.mapredfiles=true; set hive.merge.smallfiles.avgsize=1; diff --git a/ql/src/test/results/clientpositive/spark/load_dyn_part13.q.out b/ql/src/test/results/clientpositive/spark/load_dyn_part13.q.out index 3fe3bf8..83e6d31 100644 --- a/ql/src/test/results/clientpositive/spark/load_dyn_part13.q.out +++ b/ql/src/test/results/clientpositive/spark/load_dyn_part13.q.out @@ -61,8 +61,6 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -131,12 +129,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-4 - Stats-Aggr Operator - - Stage: Stage-5 - Stats-Aggr Operator - PREHOOK: query: insert overwrite table nzhang_part13 partition (ds="2010-03-03", hr) select * from ( select key, value, '22' @@ -150,8 +142,6 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@nzhang_part13@ds=2010-03-03 [Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to -[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to -[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to POSTHOOK: query: insert overwrite table nzhang_part13 partition (ds="2010-03-03", hr) select * from ( select key, value, '22' diff --git a/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out b/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out index bbb594e..42381ae 100644 --- a/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out +++ b/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out @@ -941,8 +941,8 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### -2000 0 +2000 PREHOOK: query: explain extended select * from (select key from src where false) a left outer join (select value from srcpart limit 0) b PREHOOK: type: QUERY @@ -1656,12 +1656,12 @@ STAGE PLANS: TableScan alias: src GatherStats: false - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Filter Operator - isSamplingPred: false - predicate: false (type: boolean) + Filter Operator + isSamplingPred: false + predicate: false (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 @@ -1806,12 +1806,12 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} 1 outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Filter Operator - isSamplingPred: false - predicate: false (type: boolean) + Filter Operator + isSamplingPred: false + predicate: false (type: boolean) + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 diff --git a/ql/src/test/results/clientpositive/spark/union10.q.out b/ql/src/test/results/clientpositive/spark/union10.q.out index 20c681e..2b67840 100644 --- a/ql/src/test/results/clientpositive/spark/union10.q.out +++ b/ql/src/test/results/clientpositive/spark/union10.q.out @@ -31,9 +31,6 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-0 - Stage-6 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -169,15 +166,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-4 - Stats-Aggr Operator - - Stage: Stage-5 - Stats-Aggr Operator - - Stage: Stage-6 - Stats-Aggr Operator - PREHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1 UNION ALL @@ -188,9 +176,6 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@tmptable [Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to -[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to -[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to -[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to POSTHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1 UNION ALL diff --git a/ql/src/test/results/clientpositive/spark/union18.q.out b/ql/src/test/results/clientpositive/spark/union18.q.out index 3f37a0a..658efb7 100644 --- a/ql/src/test/results/clientpositive/spark/union18.q.out +++ b/ql/src/test/results/clientpositive/spark/union18.q.out @@ -37,12 +37,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 - Stage-6 depends on stages: Stage-0 - Stage-8 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 - Stage-7 depends on stages: Stage-1 - Stage-9 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 @@ -143,12 +139,6 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator - Stage: Stage-6 - Stats-Aggr Operator - - Stage: Stage-8 - Stats-Aggr Operator - Stage: Stage-1 Move Operator tables: @@ -162,12 +152,6 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator - Stage: Stage-7 - Stats-Aggr Operator - - Stage: Stage-9 - Stats-Aggr Operator - PREHOOK: query: FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL select s2.key as key, s2.value as value from src s2) unionsrc @@ -179,10 +163,6 @@ PREHOOK: Output: default@dest1 PREHOOK: Output: default@dest2 [Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to [Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to -[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to -[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to -[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to -[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to POSTHOOK: query: FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL select s2.key as key, s2.value as value from src s2) unionsrc diff --git a/ql/src/test/results/clientpositive/spark/union19.q.out b/ql/src/test/results/clientpositive/spark/union19.q.out index 6922fcd..91dc048 100644 --- a/ql/src/test/results/clientpositive/spark/union19.q.out +++ b/ql/src/test/results/clientpositive/spark/union19.q.out @@ -39,8 +39,6 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 - Stage-6 depends on stages: Stage-1 - Stage-7 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 @@ -181,12 +179,6 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator - Stage: Stage-6 - Stats-Aggr Operator - - Stage: Stage-7 - Stats-Aggr Operator - PREHOOK: query: FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL select s2.key as key, s2.value as value from src s2) unionsrc @@ -198,8 +190,6 @@ PREHOOK: Output: default@dest1 PREHOOK: Output: default@dest2 [Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to [Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to -[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to -[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to POSTHOOK: query: FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL select s2.key as key, s2.value as value from src s2) unionsrc diff --git a/ql/src/test/results/clientpositive/spark/union28.q.out b/ql/src/test/results/clientpositive/spark/union28.q.out index 8bd5218..c55aafb 100644 --- a/ql/src/test/results/clientpositive/spark/union28.q.out +++ b/ql/src/test/results/clientpositive/spark/union28.q.out @@ -37,9 +37,6 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-0 - Stage-6 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -174,15 +171,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-4 - Stats-Aggr Operator - - Stage: Stage-5 - Stats-Aggr Operator - - Stage: Stage-6 - Stats-Aggr Operator - PREHOOK: query: insert overwrite table union_subq_union select * from ( select key, value from src @@ -198,9 +186,6 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@union_subq_union [Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to -[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to -[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to -[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to POSTHOOK: query: insert overwrite table union_subq_union select * from ( select key, value from src diff --git a/ql/src/test/results/clientpositive/spark/union29.q.out b/ql/src/test/results/clientpositive/spark/union29.q.out index b9546ef..93c94d5 100644 --- a/ql/src/test/results/clientpositive/spark/union29.q.out +++ b/ql/src/test/results/clientpositive/spark/union29.q.out @@ -37,9 +37,6 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-0 - Stage-6 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -124,15 +121,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-4 - Stats-Aggr Operator - - Stage: Stage-5 - Stats-Aggr Operator - - Stage: Stage-6 - Stats-Aggr Operator - PREHOOK: query: insert overwrite table union_subq_union select * from ( select key, value from src @@ -148,9 +136,6 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@union_subq_union [Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to -[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to -[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to -[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to POSTHOOK: query: insert overwrite table union_subq_union select * from ( select key, value from src diff --git a/ql/src/test/results/clientpositive/spark/union3.q.out b/ql/src/test/results/clientpositive/spark/union3.q.out index 3ae6536..d238a71 100644 --- a/ql/src/test/results/clientpositive/spark/union3.q.out +++ b/ql/src/test/results/clientpositive/spark/union3.q.out @@ -239,10 +239,6 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@union_out [Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to -[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to -[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to -[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to -[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to POSTHOOK: query: insert overwrite table union_out SELECT * FROM ( diff --git a/ql/src/test/results/clientpositive/spark/union30.q.out b/ql/src/test/results/clientpositive/spark/union30.q.out index 12717a1..b1014b6 100644 --- a/ql/src/test/results/clientpositive/spark/union30.q.out +++ b/ql/src/test/results/clientpositive/spark/union30.q.out @@ -51,10 +51,6 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-0 - Stage-6 depends on stages: Stage-0 - Stage-7 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -215,18 +211,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-4 - Stats-Aggr Operator - - Stage: Stage-5 - Stats-Aggr Operator - - Stage: Stage-6 - Stats-Aggr Operator - - Stage: Stage-7 - Stats-Aggr Operator - PREHOOK: query: insert overwrite table union_subq_union select * from ( @@ -249,10 +233,6 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@union_subq_union [Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to -[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to -[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to -[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to -[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to POSTHOOK: query: insert overwrite table union_subq_union select * from ( diff --git a/ql/src/test/results/clientpositive/spark/union33.q.out b/ql/src/test/results/clientpositive/spark/union33.q.out index b89757f..7eadb04 100644 --- a/ql/src/test/results/clientpositive/spark/union33.q.out +++ b/ql/src/test/results/clientpositive/spark/union33.q.out @@ -37,8 +37,6 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -142,12 +140,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-4 - Stats-Aggr Operator - - Stage: Stage-5 - Stats-Aggr Operator - PREHOOK: query: INSERT OVERWRITE TABLE test_src SELECT key, value FROM ( SELECT key, value FROM src @@ -160,8 +152,6 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_src [Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to -[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to -[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to POSTHOOK: query: INSERT OVERWRITE TABLE test_src SELECT key, value FROM ( SELECT key, value FROM src @@ -207,8 +197,6 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -312,12 +300,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-4 - Stats-Aggr Operator - - Stage: Stage-5 - Stats-Aggr Operator - PREHOOK: query: INSERT OVERWRITE TABLE test_src SELECT key, value FROM ( SELECT key, COUNT(*) AS value FROM src @@ -330,8 +312,6 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_src [Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to -[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to -[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to POSTHOOK: query: INSERT OVERWRITE TABLE test_src SELECT key, value FROM ( SELECT key, COUNT(*) AS value FROM src diff --git a/ql/src/test/results/clientpositive/spark/union4.q.out b/ql/src/test/results/clientpositive/spark/union4.q.out index 6341cd9..fd2abc8 100644 --- a/ql/src/test/results/clientpositive/spark/union4.q.out +++ b/ql/src/test/results/clientpositive/spark/union4.q.out @@ -29,8 +29,6 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -130,12 +128,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-4 - Stats-Aggr Operator - - Stage: Stage-5 - Stats-Aggr Operator - PREHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1 UNION ALL @@ -144,8 +136,6 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@tmptable [Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to -[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to -[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to POSTHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1 UNION ALL diff --git a/ql/src/test/results/clientpositive/spark/union6.q.out b/ql/src/test/results/clientpositive/spark/union6.q.out index 263d9f4..5bbb681 100644 --- a/ql/src/test/results/clientpositive/spark/union6.q.out +++ b/ql/src/test/results/clientpositive/spark/union6.q.out @@ -29,8 +29,6 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -111,12 +109,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-4 - Stats-Aggr Operator - - Stage: Stage-5 - Stats-Aggr Operator - PREHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL @@ -126,8 +118,6 @@ PREHOOK: Input: default@src PREHOOK: Input: default@src1 PREHOOK: Output: default@tmptable [Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to -[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to -[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to POSTHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL diff --git a/ql/src/test/results/clientpositive/spark/union_remove_1.q.out b/ql/src/test/results/clientpositive/spark/union_remove_1.q.out index c07fbb1..8fe85fb 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_1.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_1.q.out @@ -70,7 +70,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP) - Reducer 4 <- Map 3 (GROUP) + Reducer 5 <- Map 4 (GROUP) + Union 3 <- Reducer 2 (NONE), Reducer 5 (NONE) #### A masked pattern was here #### Vertices: Map 1 @@ -94,7 +95,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: bigint) - Map 3 + Map 4 Map Operator Tree: TableScan alias: inputtbl1 @@ -122,39 +123,41 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + Union 3 + Vertex: Union 3 Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/spark/union_remove_10.q.out b/ql/src/test/results/clientpositive/spark/union_remove_10.q.out index 927a15d..9f3fe48 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_10.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_10.q.out @@ -78,15 +78,20 @@ select * FROM ( POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-2 depends on stages: Stage-4, Stage-3, Stage-6 Stage-0 depends on stages: Stage-2 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP) - Union 3 <- Map 5 (NONE), Reducer 2 (NONE) + Union 3 <- Map 4 (NONE), Map 5 (NONE), Reducer 2 (NONE) #### A masked pattern was here #### Vertices: Map 1 @@ -114,26 +119,9 @@ STAGE PLANS: Map Operator Tree: TableScan alias: inputtbl1 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: string), UDFToLong(1) (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 - Map 5 - Map Operator Tree: - TableScan - alias: inputtbl1 - Select Operator - expressions: key (type: string), UDFToLong(2) (type: bigint) - outputColumnNames: _col0, _col1 Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 @@ -144,6 +132,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.outputtbl1 + Map 5 + Map Operator Tree: + TableScan + alias: inputtbl1 + Select Operator + expressions: key (type: string), UDFToLong(2) (type: bigint) + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 Reducer 2 Reduce Operator Tree: Group By Operator @@ -157,16 +165,28 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 Union 3 Vertex: Union 3 + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + Stage: Stage-2 Dependency Collection @@ -180,6 +200,22 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.outputtbl1 + Stage: Stage-3 + Merge Work + merge level: block + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + + Stage: Stage-5 + Merge Work + merge level: block + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + PREHOOK: query: insert overwrite table outputTbl1 SELECT * FROM ( diff --git a/ql/src/test/results/clientpositive/spark/union_remove_11.q.out b/ql/src/test/results/clientpositive/spark/union_remove_11.q.out index 96651e1..a5b459c 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_11.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_11.q.out @@ -78,8 +78,13 @@ select * FROM ( POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-2 depends on stages: Stage-4, Stage-3, Stage-6 Stage-0 depends on stages: Stage-2 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 @@ -148,6 +153,15 @@ STAGE PLANS: Union 2 Vertex: Union 2 + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + Stage: Stage-2 Dependency Collection @@ -161,6 +175,22 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.outputtbl1 + Stage: Stage-3 + Merge Work + merge level: block + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + + Stage: Stage-5 + Merge Work + merge level: block + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + PREHOOK: query: insert overwrite table outputTbl1 SELECT * FROM ( diff --git a/ql/src/test/results/clientpositive/spark/union_remove_15.q.out b/ql/src/test/results/clientpositive/spark/union_remove_15.q.out index 463ae63..6ec6e91 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_15.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_15.q.out @@ -76,7 +76,9 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP) - Reducer 4 <- Map 3 (GROUP) + Reducer 4 <- Union 3 (GROUP SORT) + Reducer 6 <- Map 5 (GROUP) + Union 3 <- Reducer 2 (NONE), Reducer 6 (NONE) #### A masked pattern was here #### Vertices: Map 1 @@ -100,7 +102,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: bigint) - Map 3 + Map 5 Map Operator Tree: TableScan alias: inputtbl1 @@ -128,39 +130,49 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint), '2' (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) Reducer 4 Reduce Operator Tree: + Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 + Reducer 6 + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint), '1' (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) + Union 3 + Vertex: Union 3 Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/spark/union_remove_16.q.out b/ql/src/test/results/clientpositive/spark/union_remove_16.q.out index 0954ae4..9f3ae1f 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_16.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_16.q.out @@ -66,15 +66,22 @@ FROM ( POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-2 depends on stages: Stage-4, Stage-3, Stage-6 Stage-0 depends on stages: Stage-2 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP) - Reducer 4 <- Map 3 (GROUP) + Reducer 4 <- Union 3 (GROUP SORT) + Reducer 6 <- Map 5 (GROUP) + Union 3 <- Reducer 2 (NONE), Reducer 6 (NONE) #### A masked pattern was here #### Vertices: Map 1 @@ -98,7 +105,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: bigint) - Map 3 + Map 5 Map Operator Tree: TableScan alias: inputtbl1 @@ -126,39 +133,58 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint), '2' (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) Reducer 4 Reduce Operator Tree: + Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 + Reducer 6 + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint), '1' (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) + Union 3 + Vertex: Union 3 + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### Stage: Stage-2 Dependency Collection @@ -175,6 +201,22 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.outputtbl1 + Stage: Stage-3 + Merge Work + merge level: block + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + + Stage: Stage-5 + Merge Work + merge level: block + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + PREHOOK: query: insert overwrite table outputTbl1 partition (ds) SELECT * FROM ( diff --git a/ql/src/test/results/clientpositive/spark/union_remove_18.q.out b/ql/src/test/results/clientpositive/spark/union_remove_18.q.out index fba6634..54267be 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_18.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_18.q.out @@ -74,7 +74,9 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP) - Reducer 4 <- Map 3 (GROUP) + Reducer 4 <- Union 3 (GROUP SORT) + Reducer 6 <- Map 5 (GROUP) + Union 3 <- Reducer 2 (NONE), Reducer 6 (NONE) #### A masked pattern was here #### Vertices: Map 1 @@ -98,7 +100,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE value expressions: _col2 (type: bigint) - Map 3 + Map 5 Map Operator Tree: TableScan alias: inputtbl1 @@ -126,39 +128,49 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col2 (type: bigint), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) Reducer 4 Reduce Operator Tree: + Extract + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + Reducer 6 + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col2 (type: bigint), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) + Union 3 + Vertex: Union 3 Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/spark/union_remove_19.q.out b/ql/src/test/results/clientpositive/spark/union_remove_19.q.out index a486323..acc76f3 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_19.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_19.q.out @@ -74,7 +74,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP) - Reducer 4 <- Map 3 (GROUP) + Reducer 5 <- Map 4 (GROUP) + Union 3 <- Reducer 2 (NONE), Reducer 5 (NONE) #### A masked pattern was here #### Vertices: Map 1 @@ -98,7 +99,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: bigint) - Map 3 + Map 4 Map Operator Tree: TableScan alias: inputtbl1 @@ -126,39 +127,41 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + Union 3 + Vertex: Union 3 Stage: Stage-2 Dependency Collection @@ -279,7 +282,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP) - Reducer 4 <- Map 3 (GROUP) + Reducer 5 <- Map 4 (GROUP) + Union 3 <- Reducer 2 (NONE), Reducer 5 (NONE) #### A masked pattern was here #### Vertices: Map 1 @@ -306,7 +310,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 15 Data size: 15 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Map 3 + Map 4 Map Operator Tree: TableScan alias: inputtbl1 @@ -337,39 +341,41 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 7 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 7 Data size: 7 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 7 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 7 Data size: 7 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + Union 3 + Vertex: Union 3 Stage: Stage-2 Dependency Collection @@ -452,7 +458,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP) - Reducer 4 <- Map 3 (GROUP) + Reducer 5 <- Map 4 (GROUP) + Union 3 <- Reducer 2 (NONE), Reducer 5 (NONE) #### A masked pattern was here #### Vertices: Map 1 @@ -476,7 +483,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: bigint) - Map 3 + Map 4 Map Operator Tree: TableScan alias: inputtbl1 @@ -504,53 +511,51 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: (_col0 + _col0) (type: double), _col1 (type: bigint) + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Filter Operator - predicate: (_col0 >= 7.0) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: double), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - Reducer 4 + Select Operator + expressions: (_col0 + _col0) (type: double), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Filter Operator + predicate: (_col0 >= 7.0) (type: boolean) + Select Operator + expressions: _col0 (type: double), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: (_col0 + _col0) (type: double), _col1 (type: bigint) + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Filter Operator - predicate: (_col0 >= 7.0) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: double), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + Select Operator + expressions: (_col0 + _col0) (type: double), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Filter Operator + predicate: (_col0 >= 7.0) (type: boolean) + Select Operator + expressions: _col0 (type: double), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + Union 3 + Vertex: Union 3 Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/spark/union_remove_2.q.out b/ql/src/test/results/clientpositive/spark/union_remove_2.q.out index 268a8d9..a55d00f 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_2.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_2.q.out @@ -75,27 +75,28 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP) + Reducer 4 <- Map 3 (GROUP) + Union 2 <- Map 1 (NONE), Map 5 (NONE), Reducer 4 (NONE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: inputtbl1 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: string), UDFToLong(2) (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - Map 2 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + Map 3 Map Operator Tree: TableScan alias: inputtbl1 @@ -116,43 +117,45 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: bigint) - Map 4 + Map 5 Map Operator Tree: TableScan alias: inputtbl1 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: string), UDFToLong(1) (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - Reducer 3 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + Union 2 + Vertex: Union 2 Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/spark/union_remove_20.q.out b/ql/src/test/results/clientpositive/spark/union_remove_20.q.out index 68de623..e240837 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_20.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_20.q.out @@ -72,7 +72,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP) - Reducer 4 <- Map 3 (GROUP) + Reducer 5 <- Map 4 (GROUP) + Union 3 <- Reducer 2 (NONE), Reducer 5 (NONE) #### A masked pattern was here #### Vertices: Map 1 @@ -96,7 +97,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: bigint) - Map 3 + Map 4 Map Operator Tree: TableScan alias: inputtbl1 @@ -124,39 +125,41 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: _col1 (type: bigint), _col0 (type: string) + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - Reducer 4 + Select Operator + expressions: _col1 (type: bigint), _col0 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: _col1 (type: bigint), _col0 (type: string) + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + Select Operator + expressions: _col1 (type: bigint), _col0 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + Union 3 + Vertex: Union 3 Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/spark/union_remove_21.q.out b/ql/src/test/results/clientpositive/spark/union_remove_21.q.out index 384f191..7265bc7 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_21.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_21.q.out @@ -72,7 +72,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP) - Reducer 4 <- Map 3 (GROUP) + Reducer 5 <- Map 4 (GROUP) + Union 3 <- Reducer 2 (NONE), Reducer 5 (NONE) #### A masked pattern was here #### Vertices: Map 1 @@ -96,7 +97,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: bigint) - Map 3 + Map 4 Map Operator Tree: TableScan alias: inputtbl1 @@ -124,39 +125,41 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - Reducer 4 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + Union 3 + Vertex: Union 3 Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/spark/union_remove_24.q.out b/ql/src/test/results/clientpositive/spark/union_remove_24.q.out index 0494612..c46971c 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_24.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_24.q.out @@ -68,7 +68,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP) - Reducer 4 <- Map 3 (GROUP) + Reducer 5 <- Map 4 (GROUP) + Union 3 <- Reducer 2 (NONE), Reducer 5 (NONE) #### A masked pattern was here #### Vertices: Map 1 @@ -92,7 +93,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: bigint) - Map 3 + Map 4 Map Operator Tree: TableScan alias: inputtbl1 @@ -120,39 +121,41 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: UDFToDouble(UDFToLong(_col0)) (type: double), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - Reducer 4 + Select Operator + expressions: _col0 (type: double), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: UDFToDouble(_col0) (type: double), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + Select Operator + expressions: _col0 (type: double), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + Union 3 + Vertex: Union 3 Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/spark/union_remove_25.q.out b/ql/src/test/results/clientpositive/spark/union_remove_25.q.out index e93868e..0ecbe80 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_25.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_25.q.out @@ -86,7 +86,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP) - Reducer 4 <- Map 3 (GROUP) + Reducer 5 <- Map 4 (GROUP) + Union 3 <- Reducer 2 (NONE), Reducer 5 (NONE) #### A masked pattern was here #### Vertices: Map 1 @@ -110,7 +111,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: bigint) - Map 3 + Map 4 Map Operator Tree: TableScan alias: inputtbl1 @@ -138,39 +139,41 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + Union 3 + Vertex: Union 3 Stage: Stage-2 Dependency Collection @@ -298,7 +301,9 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP) - Reducer 4 <- Map 3 (GROUP) + Reducer 4 <- Union 3 (GROUP SORT) + Reducer 6 <- Map 5 (GROUP) + Union 3 <- Reducer 2 (NONE), Reducer 6 (NONE) #### A masked pattern was here #### Vertices: Map 1 @@ -317,7 +322,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Map 3 + Map 5 Map Operator Tree: TableScan alias: srcpart @@ -338,43 +343,45 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 500 - Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl2 + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) Reducer 4 Reduce Operator Tree: + Extract + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl2 + Reducer 6 + Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 500 - Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl2 + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) + Union 3 + Vertex: Union 3 Stage: Stage-2 Dependency Collection @@ -449,7 +456,7 @@ Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE false - numFiles 2 + numFiles 1 numRows -1 rawDataSize -1 totalSize 6826 @@ -491,7 +498,9 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP) - Reducer 4 <- Map 3 (GROUP) + Reducer 4 <- Union 3 (GROUP SORT) + Reducer 6 <- Map 5 (GROUP) + Union 3 <- Reducer 2 (NONE), Reducer 6 (NONE) #### A masked pattern was here #### Vertices: Map 1 @@ -510,7 +519,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - Map 3 + Map 5 Map Operator Tree: TableScan alias: srcpart @@ -531,43 +540,45 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1000 - Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl3 + Reduce Output Operator + key expressions: _col2 (type: string), _col3 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string), _col3 (type: string) + value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) Reducer 4 Reduce Operator Tree: + Extract + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl3 + Reducer 6 + Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1000 - Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl3 + Reduce Output Operator + key expressions: _col2 (type: string), _col3 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string), _col3 (type: string) + value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) + Union 3 + Vertex: Union 3 Stage: Stage-2 Dependency Collection @@ -648,7 +659,7 @@ Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE false - numFiles 2 + numFiles 1 numRows -1 rawDataSize -1 totalSize 6812 diff --git a/ql/src/test/results/clientpositive/spark/union_remove_4.q.out b/ql/src/test/results/clientpositive/spark/union_remove_4.q.out index cc46dda..518dc24 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_4.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_4.q.out @@ -62,15 +62,21 @@ FROM ( POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-2 depends on stages: Stage-4, Stage-3, Stage-6 Stage-0 depends on stages: Stage-2 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP) - Reducer 4 <- Map 3 (GROUP) + Reducer 5 <- Map 4 (GROUP) + Union 3 <- Reducer 2 (NONE), Reducer 5 (NONE) #### A masked pattern was here #### Vertices: Map 1 @@ -94,7 +100,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: bigint) - Map 3 + Map 4 Map Operator Tree: TableScan alias: inputtbl1 @@ -122,39 +128,50 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + Union 3 + Vertex: Union 3 + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### Stage: Stage-2 Dependency Collection @@ -169,6 +186,42 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Merge + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + + Stage: Stage-5 + Spark +#### A masked pattern was here #### + Vertices: + Merge + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + PREHOOK: query: insert overwrite table outputTbl1 SELECT * FROM ( diff --git a/ql/src/test/results/clientpositive/spark/union_remove_5.q.out b/ql/src/test/results/clientpositive/spark/union_remove_5.q.out index f6cdeb3..f7f9627 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_5.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_5.q.out @@ -70,34 +70,40 @@ FROM ( POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-2 depends on stages: Stage-4, Stage-3, Stage-6 Stage-0 depends on stages: Stage-2 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP) + Reducer 4 <- Map 3 (GROUP) + Union 2 <- Map 1 (NONE), Map 5 (NONE), Reducer 4 (NONE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: inputtbl1 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: string), UDFToLong(2) (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - Map 2 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + Map 3 Map Operator Tree: TableScan alias: inputtbl1 @@ -118,43 +124,54 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: bigint) - Map 4 + Map 5 Map Operator Tree: TableScan alias: inputtbl1 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: string), UDFToLong(1) (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - Reducer 3 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + Union 2 + Vertex: Union 2 + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### Stage: Stage-2 Dependency Collection @@ -169,6 +186,42 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Merge + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + + Stage: Stage-5 + Spark +#### A masked pattern was here #### + Vertices: + Merge + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + PREHOOK: query: insert overwrite table outputTbl1 SELECT * FROM ( diff --git a/ql/src/test/results/clientpositive/spark/union_remove_7.q.out b/ql/src/test/results/clientpositive/spark/union_remove_7.q.out index b3ec85b..7f4b1c3 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_7.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_7.q.out @@ -74,7 +74,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP) - Reducer 4 <- Map 3 (GROUP) + Reducer 5 <- Map 4 (GROUP) + Union 3 <- Reducer 2 (NONE), Reducer 5 (NONE) #### A masked pattern was here #### Vertices: Map 1 @@ -98,7 +99,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: bigint) - Map 3 + Map 4 Map Operator Tree: TableScan alias: inputtbl1 @@ -126,39 +127,41 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 + Union 3 + Vertex: Union 3 Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/spark/union_remove_8.q.out b/ql/src/test/results/clientpositive/spark/union_remove_8.q.out index d55b60d..3f7dbb9 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_8.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_8.q.out @@ -79,27 +79,28 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP) + Reducer 4 <- Map 3 (GROUP) + Union 2 <- Map 1 (NONE), Map 5 (NONE), Reducer 4 (NONE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: inputtbl1 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: string), UDFToLong(2) (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 - Map 2 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 + Map 3 Map Operator Tree: TableScan alias: inputtbl1 @@ -120,43 +121,45 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: bigint) - Map 4 + Map 5 Map Operator Tree: TableScan alias: inputtbl1 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: string), UDFToLong(1) (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 - Reducer 3 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 + Union 2 + Vertex: Union 2 Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/spark/union_remove_9.q.out b/ql/src/test/results/clientpositive/spark/union_remove_9.q.out index 1f0260c..973b1fd 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_9.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_9.q.out @@ -74,15 +74,20 @@ select * FROM ( POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-2 depends on stages: Stage-4, Stage-3, Stage-6 Stage-0 depends on stages: Stage-2 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 4 <- Map 3 (GROUP) - Union 2 <- Map 1 (NONE), Map 5 (NONE) + Union 2 <- Map 1 (NONE), Map 5 (NONE), Reducer 4 (NONE) #### A masked pattern was here #### Vertices: Map 1 @@ -95,13 +100,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint) outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 Map 3 Map Operator Tree: TableScan @@ -133,13 +141,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint) outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 Reducer 4 Reduce Operator Tree: Group By Operator @@ -147,22 +158,31 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 Union 2 Vertex: Union 2 + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + Stage: Stage-2 Dependency Collection @@ -176,6 +196,22 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.outputtbl1 + Stage: Stage-3 + Merge Work + merge level: block + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + + Stage: Stage-5 + Merge Work + merge level: block + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + PREHOOK: query: insert overwrite table outputTbl1 SELECT * FROM (