commit 5871687dd57436dc50962caf5873cc9d1841f420 Author: Andrew Sherman Date: Tue Jan 30 17:23:36 2018 -0800 HIVE-17935: set the default for hive.optimize.sort.dynamic.partition to true diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index b7d3e99e1a505f576a06c530080fc72dddcd85ba..6d9614b907fb6e76bb8e46fe9950c1ca5083cb12 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1653,7 +1653,7 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "Currently it only works with Apache Tez. This should always be set to true. \n" + "Since it is a new feature, it has been made configurable."), - HIVEOPTSORTDYNAMICPARTITION("hive.optimize.sort.dynamic.partition", false, + HIVEOPTSORTDYNAMICPARTITION("hive.optimize.sort.dynamic.partition", true, "When enabled dynamic partitioning column will be globally sorted.\n" + "This way we can keep only one record writer open for each partition value\n" + "in the reducer thereby reducing the memory pressure on reducers."), diff --git itests/hive-unit/src/test/java/org/apache/hive/jdbc/AbstractJdbcTriggersTest.java itests/hive-unit/src/test/java/org/apache/hive/jdbc/AbstractJdbcTriggersTest.java index 62ee66f7172397a52caa09443d7fb0504f7cba0d..4662074049632c1d60444f414b15e10ccf6effdd 100644 --- itests/hive-unit/src/test/java/org/apache/hive/jdbc/AbstractJdbcTriggersTest.java +++ itests/hive-unit/src/test/java/org/apache/hive/jdbc/AbstractJdbcTriggersTest.java @@ -74,6 +74,7 @@ public static void beforeTest() throws Exception { conf.setBoolVar(ConfVars.TEZ_EXEC_SUMMARY, true); conf.setBoolVar(ConfVars.HIVE_STRICT_CHECKS_CARTESIAN, false); conf.setVar(ConfVars.LLAP_IO_MEMORY_MODE, "none"); + conf.setBoolVar(ConfVars.HIVEOPTSORTDYNAMICPARTITION, false); conf.addResource(new URL("file://" + new File(confDir).toURI().getPath() + "/tez-site.xml")); diff --git ql/src/test/results/clientpositive/perf/spark/query83.q.out ql/src/test/results/clientpositive/perf/spark/query83.q.out index dc04bca3023ac909c314c28acde6e0ac58e1e758..710f4f90375821c3bfdc5998a39903111c0a15cc 100644 --- ql/src/test/results/clientpositive/perf/spark/query83.q.out +++ ql/src/test/results/clientpositive/perf/spark/query83.q.out @@ -138,26 +138,26 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 2), Map 13 (PARTITION-LEVEL SORT, 2) - Reducer 12 <- Reducer 11 (GROUP, 2) + Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 1), Map 13 (PARTITION-LEVEL SORT, 1) + Reducer 12 <- Reducer 11 (GROUP, 1) Reducer 15 <- Map 14 (PARTITION-LEVEL SORT, 41), Map 18 (PARTITION-LEVEL SORT, 41) Reducer 16 <- Reducer 15 (PARTITION-LEVEL SORT, 40), Reducer 20 (PARTITION-LEVEL SORT, 40) Reducer 17 <- Reducer 16 (GROUP, 43) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 30), Map 7 (PARTITION-LEVEL SORT, 30) - Reducer 20 <- Map 19 (PARTITION-LEVEL SORT, 2), Reducer 23 (PARTITION-LEVEL SORT, 2) - Reducer 22 <- Map 21 (PARTITION-LEVEL SORT, 2), Map 24 (PARTITION-LEVEL SORT, 2) - Reducer 23 <- Reducer 22 (GROUP, 2) + Reducer 20 <- Map 19 (PARTITION-LEVEL SORT, 1), Reducer 23 (PARTITION-LEVEL SORT, 1) + Reducer 22 <- Map 21 (PARTITION-LEVEL SORT, 1), Map 24 (PARTITION-LEVEL SORT, 1) + Reducer 23 <- Reducer 22 (GROUP, 1) Reducer 26 <- Map 25 (PARTITION-LEVEL SORT, 16), Map 29 (PARTITION-LEVEL SORT, 16) Reducer 27 <- Reducer 26 (PARTITION-LEVEL SORT, 13), Reducer 31 (PARTITION-LEVEL SORT, 13) Reducer 28 <- Reducer 27 (GROUP, 13) Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 27), Reducer 9 (PARTITION-LEVEL SORT, 27) - Reducer 31 <- Map 30 (PARTITION-LEVEL SORT, 2), Reducer 34 (PARTITION-LEVEL SORT, 2) - Reducer 33 <- Map 32 (PARTITION-LEVEL SORT, 2), Map 35 (PARTITION-LEVEL SORT, 2) - Reducer 34 <- Reducer 33 (GROUP, 2) + Reducer 31 <- Map 30 (PARTITION-LEVEL SORT, 1), Reducer 34 (PARTITION-LEVEL SORT, 1) + Reducer 33 <- Map 32 (PARTITION-LEVEL SORT, 1), Map 35 (PARTITION-LEVEL SORT, 1) + Reducer 34 <- Reducer 33 (GROUP, 1) Reducer 4 <- Reducer 3 (GROUP, 29) Reducer 5 <- Reducer 17 (PARTITION-LEVEL SORT, 42), Reducer 28 (PARTITION-LEVEL SORT, 42), Reducer 4 (PARTITION-LEVEL SORT, 42) Reducer 6 <- Reducer 5 (SORT, 1) - Reducer 9 <- Map 8 (PARTITION-LEVEL SORT, 2), Reducer 12 (PARTITION-LEVEL SORT, 2) + Reducer 9 <- Map 8 (PARTITION-LEVEL SORT, 1), Reducer 12 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/load_dyn_part1.q.out ql/src/test/results/clientpositive/spark/load_dyn_part1.q.out index 9c62fb344bc7778565cfb5f665e83e076d94f135..c3bfa2de01a099d351c9136bba793f1a5ff69f86 100644 --- ql/src/test/results/clientpositive/spark/load_dyn_part1.q.out +++ ql/src/test/results/clientpositive/spark/load_dyn_part1.q.out @@ -61,9 +61,12 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: srcpart @@ -75,14 +78,17 @@ STAGE PLANS: expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string), _col3 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string), _col3 (type: string) Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part1 + value expressions: _col0 (type: string), _col1 (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (ds > '2008-04-08') (type: boolean) Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE @@ -90,14 +96,42 @@ STAGE PLANS: expressions: key (type: string), value (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part2 + value expressions: _col0 (type: string), _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part1 + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part2 Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/load_dyn_part10.q.out ql/src/test/results/clientpositive/spark/load_dyn_part10.q.out index ad4668e14950ac980e7559e1219d4f2b7ddaae6f..304a786811cba75343a5f33d88de649f13efb759 100644 --- ql/src/test/results/clientpositive/spark/load_dyn_part10.q.out +++ ql/src/test/results/clientpositive/spark/load_dyn_part10.q.out @@ -49,6 +49,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -60,14 +62,27 @@ STAGE PLANS: expressions: key (type: string), value (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part10 + value expressions: _col0 (type: string), _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part10 Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out index 08d63506c01d7994e3d0e62b86a996c14e2ff3f1..94b990c22c76f26514e764053dd902dee64b7811 100644 --- ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out +++ ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out @@ -52,8 +52,9 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 4 <- Map 1 (GROUP, 1) - Reducer 6 <- Map 1 (GROUP, 1) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 1 (GROUP, 1) + Reducer 7 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -79,15 +80,28 @@ STAGE PLANS: expressions: 'k1' (type: string), null (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 6 Data size: 938 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part14 - Reducer 4 + value expressions: _col0 (type: string) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 771 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 6 Data size: 771 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part14 + Reducer 5 Reduce Operator Tree: Limit Number of rows: 2 @@ -96,15 +110,13 @@ STAGE PLANS: expressions: 'k2' (type: string), '' (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 6 Data size: 938 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part14 - Reducer 6 + value expressions: _col0 (type: string) + Reducer 7 Reduce Operator Tree: Limit Number of rows: 2 @@ -113,14 +125,12 @@ STAGE PLANS: expressions: 'k3' (type: string), ' ' (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 342 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 6 Data size: 938 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part14 + value expressions: _col0 (type: string) Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/load_dyn_part3.q.out ql/src/test/results/clientpositive/spark/load_dyn_part3.q.out index ca6729aba8d4133c31bd140c53e15fd81f5e1bd7..69c520677e2d6ccbbc182bd1b7d6e1b330218309 100644 --- ql/src/test/results/clientpositive/spark/load_dyn_part3.q.out +++ ql/src/test/results/clientpositive/spark/load_dyn_part3.q.out @@ -47,6 +47,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -58,14 +60,27 @@ STAGE PLANS: expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string), _col3 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string), _col3 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part3 + value expressions: _col0 (type: string), _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part3 Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/load_dyn_part4.q.out ql/src/test/results/clientpositive/spark/load_dyn_part4.q.out index 368c08a501dc662844bf3ff9e8c12c7a6280d2a0..e7a2f4fe0c45d445144ce27a74b439d844085974 100644 --- ql/src/test/results/clientpositive/spark/load_dyn_part4.q.out +++ ql/src/test/results/clientpositive/spark/load_dyn_part4.q.out @@ -57,6 +57,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -68,14 +70,27 @@ STAGE PLANS: expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string), _col3 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string), _col3 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part4 + value expressions: _col0 (type: string), _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part4 Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/load_dyn_part5.q.out ql/src/test/results/clientpositive/spark/load_dyn_part5.q.out index 4fc0d8c31ac78b855379ff9ce1ea0e9c31006f5c..25679366d8861b2b52feb8e464c343a39c2f5a63 100644 --- ql/src/test/results/clientpositive/spark/load_dyn_part5.q.out +++ ql/src/test/results/clientpositive/spark/load_dyn_part5.q.out @@ -34,6 +34,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -45,14 +47,27 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part5 + value expressions: _col0 (type: string) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part5 Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/load_dyn_part8.q.out ql/src/test/results/clientpositive/spark/load_dyn_part8.q.out index fea2d518acf00bcebd0e4871b3755aba33241158..ddb8c41041d2807f28803d8abcc4224cf7c2a97b 100644 --- ql/src/test/results/clientpositive/spark/load_dyn_part8.q.out +++ ql/src/test/results/clientpositive/spark/load_dyn_part8.q.out @@ -53,9 +53,12 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: srcpart @@ -69,35 +72,221 @@ STAGE PLANS: expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 + Reduce Output Operator + key expressions: _col2 (type: string), _col3 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col2 (type: string), _col3 (type: string) Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: string), _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### - name default.nzhang_part8 - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct nzhang_part8 { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part8 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [srcpart] + /srcpart/ds=2008-04-08/hr=12 [srcpart] + /srcpart/ds=2008-04-09/hr=11 [srcpart] + /srcpart/ds=2008-04-09/hr=12 [srcpart] + Map 5 + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + GatherStats: false Filter Operator isSamplingPred: false predicate: (ds > '2008-04-08') (type: boolean) @@ -106,36 +295,15 @@ STAGE PLANS: expressions: key (type: string), value (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 2 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2008-12-31/ + Reduce Output Operator + key expressions: _col2 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.nzhang_part8 - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct nzhang_part8 { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part8 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + tag: -1 + value expressions: _col0 (type: string), _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -336,6 +504,81 @@ STAGE PLANS: /srcpart/ds=2008-04-08/hr=12 [srcpart] /srcpart/ds=2008-04-09/hr=11 [srcpart] /srcpart/ds=2008-04-09/hr=12 [srcpart] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + Dp Sort State: PARTITION_SORTED + NumFilesPerFileSink: 1 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.nzhang_part8 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct nzhang_part8 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part8 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 2 +#### A masked pattern was here #### + Dp Sort State: PARTITION_SORTED + NumFilesPerFileSink: 1 + Static Partition Specification: ds=2008-12-31/ + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.nzhang_part8 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct nzhang_part8 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part8 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/load_dyn_part9.q.out ql/src/test/results/clientpositive/spark/load_dyn_part9.q.out index 4bb08a05d90f447c4362610e9014c385703e5949..3d7f16506b9c63149960cb19dc3ccf0b17de68d7 100644 --- ql/src/test/results/clientpositive/spark/load_dyn_part9.q.out +++ ql/src/test/results/clientpositive/spark/load_dyn_part9.q.out @@ -49,6 +49,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -60,14 +62,27 @@ STAGE PLANS: expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string), _col3 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string), _col3 (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part9 + value expressions: _col0 (type: string), _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part9 Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/orc_merge2.q.out ql/src/test/results/clientpositive/spark/orc_merge2.q.out index 2bc5ff181f6535c68c9037797953402e22a6df28..acb302003abd9749666c949f389340ec074587c1 100644 --- ql/src/test/results/clientpositive/spark/orc_merge2.q.out +++ ql/src/test/results/clientpositive/spark/orc_merge2.q.out @@ -37,6 +37,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4) #### A masked pattern was here #### Vertices: Map 1 @@ -48,14 +50,27 @@ STAGE PLANS: expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 10) (type: int), (hash(value) pmod 10) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: int), _col3 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: int), _col3 (type: int) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.orcfile_merge2a + value expressions: _col0 (type: int), _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orcfile_merge2a Stage: Stage-7 Conditional Operator diff --git ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_4.q.out ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_4.q.out index e7a789ad65d7d835108be2d1a5698c10d2661cd7..dfd2bb9d4cbe99c66b123834c0d2e5b661cd3ab6 100644 --- ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_4.q.out +++ ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_4.q.out @@ -146,8 +146,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 3 (PARTITION-LEVEL SORT, 4) - Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 6 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 6), Map 3 (PARTITION-LEVEL SORT, 6) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 6), Map 6 (PARTITION-LEVEL SORT, 6) #### A masked pattern was here #### Vertices: Map 1 @@ -376,8 +376,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 3 (PARTITION-LEVEL SORT, 4) - Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 6 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 6), Map 3 (PARTITION-LEVEL SORT, 6) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 6), Map 6 (PARTITION-LEVEL SORT, 6) #### A masked pattern was here #### Vertices: Map 1 @@ -606,8 +606,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 3 (PARTITION-LEVEL SORT, 4) - Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 3 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 6), Map 3 (PARTITION-LEVEL SORT, 6) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 6), Map 3 (PARTITION-LEVEL SORT, 6) #### A masked pattern was here #### Vertices: Map 1 @@ -795,8 +795,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 3 (PARTITION-LEVEL SORT, 4) - Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 4), Map 6 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 6), Map 3 (PARTITION-LEVEL SORT, 6) + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 6), Map 6 (PARTITION-LEVEL SORT, 6) #### A masked pattern was here #### Vertices: Map 1 @@ -984,8 +984,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 3 (PARTITION-LEVEL SORT, 4) - Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 4), Map 6 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 6), Map 3 (PARTITION-LEVEL SORT, 6) + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 6), Map 6 (PARTITION-LEVEL SORT, 6) #### A masked pattern was here #### Vertices: Map 1 @@ -1168,8 +1168,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 4), Reducer 2 (PARTITION-LEVEL SORT, 4) - Reducer 7 <- Map 8 (PARTITION-LEVEL SORT, 4), Reducer 2 (PARTITION-LEVEL SORT, 4) + Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 6), Reducer 2 (PARTITION-LEVEL SORT, 6) + Reducer 7 <- Map 8 (PARTITION-LEVEL SORT, 6), Reducer 2 (PARTITION-LEVEL SORT, 6) #### A masked pattern was here #### Vertices: Map 1 @@ -1447,8 +1447,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 4), Reducer 2 (PARTITION-LEVEL SORT, 4) - Reducer 7 <- Map 8 (PARTITION-LEVEL SORT, 4), Reducer 2 (PARTITION-LEVEL SORT, 4) + Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 6), Reducer 2 (PARTITION-LEVEL SORT, 6) + Reducer 7 <- Map 8 (PARTITION-LEVEL SORT, 6), Reducer 2 (PARTITION-LEVEL SORT, 6) #### A masked pattern was here #### Vertices: Map 1 @@ -1763,9 +1763,9 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 4), Reducer 2 (PARTITION-LEVEL SORT, 4) + Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 6), Reducer 2 (PARTITION-LEVEL SORT, 6) Reducer 6 <- Map 1 (SORT, 1) - Reducer 7 <- Map 8 (PARTITION-LEVEL SORT, 4), Reducer 6 (PARTITION-LEVEL SORT, 4) + Reducer 7 <- Map 8 (PARTITION-LEVEL SORT, 6), Reducer 6 (PARTITION-LEVEL SORT, 6) #### A masked pattern was here #### Vertices: Map 1 @@ -2028,9 +2028,9 @@ STAGE PLANS: Spark Edges: Reducer 10 <- Map 9 (GROUP, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Reducer 4 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 6), Reducer 4 (PARTITION-LEVEL SORT, 6) Reducer 4 <- Map 3 (GROUP, 1) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 4), Reducer 10 (PARTITION-LEVEL SORT, 4), Reducer 4 (PARTITION-LEVEL SORT, 4) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 6), Reducer 10 (PARTITION-LEVEL SORT, 6), Reducer 4 (PARTITION-LEVEL SORT, 6) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_5.q.out ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_5.q.out index 189a43bd159a1376a0f7c8b76153e6c34609b154..1113584568655d40a06020a010f7d1ffd3adca40 100644 --- ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_5.q.out +++ ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_5.q.out @@ -90,8 +90,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 4), Reducer 2 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 6), Map 4 (PARTITION-LEVEL SORT, 6) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 6), Reducer 2 (PARTITION-LEVEL SORT, 6) #### A masked pattern was here #### Vertices: Map 1 @@ -236,7 +236,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 6), Map 4 (PARTITION-LEVEL SORT, 6) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_mapjoin_only.q.out ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_mapjoin_only.q.out index 0f7ad17f75b2826dea05c084a61005035fdfdfc2..3bebb72234cba8166589390d2cbc3309e6ad7c91 100644 --- ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_mapjoin_only.q.out +++ ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_mapjoin_only.q.out @@ -87,7 +87,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 3 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 6), Map 3 (PARTITION-LEVEL SORT, 6) #### A masked pattern was here #### Vertices: Map 1 @@ -259,7 +259,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 3 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 6), Map 3 (PARTITION-LEVEL SORT, 6) #### A masked pattern was here #### Vertices: Map 1 @@ -397,7 +397,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 3 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 6), Map 3 (PARTITION-LEVEL SORT, 6) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/spark_explain_groupbyshuffle.q.out ql/src/test/results/clientpositive/spark/spark_explain_groupbyshuffle.q.out index 8f69f8c7b8d076f690946d71c8431aeb56a35da1..fe4a8d6eda39453b8b0147df35d3efd4efe6cda5 100644 --- ql/src/test/results/clientpositive/spark/spark_explain_groupbyshuffle.q.out +++ ql/src/test/results/clientpositive/spark/spark_explain_groupbyshuffle.q.out @@ -10,7 +10,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 4) + Reducer 2 <- Map 1 (GROUP, 6) #### A masked pattern was here #### Vertices: Map 1 @@ -68,7 +68,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 6) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/stats2.q.out ql/src/test/results/clientpositive/spark/stats2.q.out index e8330dbf2b64ecedd6e98b54a2d79c8062aa4f4a..1348062bf1f3e3d11a87c4e1b97d539f1d57ee2d 100644 --- ql/src/test/results/clientpositive/spark/stats2.q.out +++ ql/src/test/results/clientpositive/spark/stats2.q.out @@ -19,6 +19,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -30,14 +32,27 @@ STAGE PLANS: expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string), _col3 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string), _col3 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.analyze_t1 + value expressions: _col0 (type: string), _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.analyze_t1 Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/union.q.out ql/src/test/results/clientpositive/spark/union.q.out index fb1ad65e2f9ec91186a5d37dd5c9e01612b0d1df..b7ec6ac49b9ea7562f4aa265a78e727714b1a5d1 100644 --- ql/src/test/results/clientpositive/spark/union.q.out +++ ql/src/test/results/clientpositive/spark/union.q.out @@ -86,6 +86,90 @@ INSERT OVERWRITE DIRECTORY 'target/warehouse/union.out' SELECT unioninput.* POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: target/warehouse/union.out +86val_86 +27val_27 +98val_98 +66val_66 +37val_37 +15val_15 +82val_82 +17val_17 +0val_0 +57val_57 +20val_20 +92val_92 +47val_47 +72val_72 +4val_4 +35val_35 +54val_54 +51val_51 +65val_65 +83val_83 +12val_12 +67val_67 +84val_84 +58val_58 +8val_8 +24val_24 +42val_42 +0val_0 +96val_96 +26val_26 +51val_51 +43val_43 +95val_95 +98val_98 +85val_85 +77val_77 +0val_0 +87val_87 +15val_15 +72val_72 +90val_90 +19val_19 +10val_10 +5val_5 +58val_58 +35val_35 +95val_95 +11val_11 +34val_34 +42val_42 +78val_78 +76val_76 +41val_41 +30val_30 +64val_64 +76val_76 +74val_74 +69val_69 +33val_33 +70val_70 +5val_5 +2val_2 +35val_35 +80val_80 +44val_44 +53val_53 +90val_90 +12val_12 +5val_5 +70val_70 +24val_24 +70val_70 +83val_83 +26val_26 +67val_67 +18val_18 +9val_9 +18val_18 +97val_97 +84val_84 +28val_28 +37val_37 +90val_90 +97val_97 238val_238 311val_311 165val_165 @@ -500,87 +584,3 @@ POSTHOOK: Output: target/warehouse/union.out 403val_403 400val_400 200val_200 -86val_86 -27val_27 -98val_98 -66val_66 -37val_37 -15val_15 -82val_82 -17val_17 -0val_0 -57val_57 -20val_20 -92val_92 -47val_47 -72val_72 -4val_4 -35val_35 -54val_54 -51val_51 -65val_65 -83val_83 -12val_12 -67val_67 -84val_84 -58val_58 -8val_8 -24val_24 -42val_42 -0val_0 -96val_96 -26val_26 -51val_51 -43val_43 -95val_95 -98val_98 -85val_85 -77val_77 -0val_0 -87val_87 -15val_15 -72val_72 -90val_90 -19val_19 -10val_10 -5val_5 -58val_58 -35val_35 -95val_95 -11val_11 -34val_34 -42val_42 -78val_78 -76val_76 -41val_41 -30val_30 -64val_64 -76val_76 -74val_74 -69val_69 -33val_33 -70val_70 -5val_5 -2val_2 -35val_35 -80val_80 -44val_44 -53val_53 -90val_90 -12val_12 -5val_5 -70val_70 -24val_24 -70val_70 -83val_83 -26val_26 -67val_67 -18val_18 -9val_9 -18val_18 -97val_97 -84val_84 -28val_28 -37val_37 -90val_90 -97val_97 diff --git ql/src/test/results/clientpositive/spark/union14.q.out ql/src/test/results/clientpositive/spark/union14.q.out index d3ede704284a65321e013699f5f7742edfce556f..acdecbf175f2688cdbd424473b753842f113a89c 100644 --- ql/src/test/results/clientpositive/spark/union14.q.out +++ ql/src/test/results/clientpositive/spark/union14.q.out @@ -122,20 +122,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 #### A masked pattern was here #### +278 1 273 1 -224 1 +128 1 255 1 - 10 +tst1 1 +146 1 369 1 213 1 -tst1 1 -66 1 -406 1 -150 1 +224 1 +311 1 238 1 +150 1 + 10 +406 1 +66 1 401 1 98 1 -146 1 -311 1 -128 1 -278 1 diff --git ql/src/test/results/clientpositive/spark/union15.q.out ql/src/test/results/clientpositive/spark/union15.q.out index 77ed5434a881348b8d139eaf43adf7720191d27b..3fe3091e0ec997b22f6d983d29f3b3ad292b3f19 100644 --- ql/src/test/results/clientpositive/spark/union15.q.out +++ ql/src/test/results/clientpositive/spark/union15.q.out @@ -147,20 +147,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 #### A masked pattern was here #### +278 2 273 2 -224 2 +128 2 255 2 - 20 +tst1 1 +146 2 369 2 213 2 -tst1 1 -66 2 -406 2 -150 2 +224 2 +311 2 238 2 +150 2 + 20 +406 2 +66 2 401 2 98 2 -146 2 -311 2 -128 2 -278 2 diff --git ql/src/test/results/clientpositive/spark/union7.q.out ql/src/test/results/clientpositive/spark/union7.q.out index 9f99c180064fc02cc4409097714277c0d6aa9542..621c54e7ae4d77da85b5512584e297477fc17f08 100644 --- ql/src/test/results/clientpositive/spark/union7.q.out +++ ql/src/test/results/clientpositive/spark/union7.q.out @@ -118,20 +118,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 #### A masked pattern was here #### +278 1 273 1 -224 1 +128 1 255 1 - 10 +tst1 1 +146 1 369 1 213 1 -tst1 1 -66 1 -406 1 -150 1 +224 1 +311 1 238 1 +150 1 + 10 +406 1 +66 1 401 1 98 1 -146 1 -311 1 -128 1 -278 1 diff --git ql/src/test/results/clientpositive/spark/union_null.q.out ql/src/test/results/clientpositive/spark/union_null.q.out index 00bd9d93a18846e6cbaf778a4acbc72dbb28c289..d37adbb8d898c469125b36a99cef6e325dda34f3 100644 --- ql/src/test/results/clientpositive/spark/union_null.q.out +++ ql/src/test/results/clientpositive/spark/union_null.q.out @@ -6,16 +6,16 @@ POSTHOOK: query: select x from (select * from (select value as x from src order POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -val_0 -val_0 -val_0 -val_10 -val_100 NULL NULL NULL NULL NULL +val_0 +val_0 +val_0 +val_10 +val_100 PREHOOK: query: select x from (select * from (select value as x from src order by x limit 5)a union all select * from (select cast(NULL as string) as x from src limit 5)b )a PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -24,16 +24,16 @@ POSTHOOK: query: select x from (select * from (select value as x from src order POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -NULL -NULL -NULL -NULL -NULL val_0 val_0 val_0 val_10 val_100 +NULL +NULL +NULL +NULL +NULL PREHOOK: query: select * from (select * from (select cast(null as string) as N from src1 group by key)a UNION ALL select * from (select cast(null as string) as N from src1 group by key)b ) a PREHOOK: type: QUERY PREHOOK: Input: default@src1 @@ -82,5 +82,5 @@ POSTHOOK: query: select null as c1 UNION ALL select 1 as c1 POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### -NULL 1 +NULL diff --git ql/src/test/results/clientpositive/spark/union_remove_15.q.out ql/src/test/results/clientpositive/spark/union_remove_15.q.out index 58b968840c961ad6d329dadab07020b6795f5b26..dfd5b32f9ca18fb9253bc6aae3643075379f770f 100644 --- ql/src/test/results/clientpositive/spark/union_remove_15.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_15.q.out @@ -49,7 +49,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) - Reducer 4 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -85,15 +86,28 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: bigint), '1' (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 300 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 2 Data size: 600 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 - Reducer 4 + value expressions: _col0 (type: string), _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint), KEY._col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 2 Data size: 600 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -105,14 +119,12 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: bigint), '2' (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 300 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 2 Data size: 600 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 + value expressions: _col0 (type: string), _col1 (type: bigint) Stage: Stage-0 Move Operator @@ -172,11 +184,11 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - numFiles 4 + numFiles 2 numPartitions 2 numRows 0 rawDataSize 0 - totalSize 332 + totalSize 178 #### A masked pattern was here #### # Storage Information diff --git ql/src/test/results/clientpositive/spark/union_remove_16.q.out ql/src/test/results/clientpositive/spark/union_remove_16.q.out index 94ee97e67f4ff6f8a2291de989be6b7a9c5fccd2..d9e95fe6f6e205a283f5e25a4c763cd25d5d3f4f 100644 --- ql/src/test/results/clientpositive/spark/union_remove_16.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_16.q.out @@ -54,7 +54,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) - Reducer 4 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -90,15 +91,28 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: bigint), '1' (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 300 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 2 Data size: 600 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 - Reducer 4 + value expressions: _col0 (type: string), _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint), KEY._col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 2 Data size: 600 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -110,14 +124,12 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: bigint), '2' (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 300 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 2 Data size: 600 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 + value expressions: _col0 (type: string), _col1 (type: bigint) Stage: Stage-6 Conditional Operator @@ -214,11 +226,11 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - numFiles 4 + numFiles 2 numPartitions 2 numRows 0 rawDataSize 0 - totalSize 332 + totalSize 178 #### A masked pattern was here #### # Storage Information diff --git ql/src/test/results/clientpositive/spark/union_remove_17.q.out ql/src/test/results/clientpositive/spark/union_remove_17.q.out index 7715683ff4f464f0b6cfce43ab52eeb919088211..c2a1308a4a0ec002e8685eea5572cade736b3561 100644 --- ql/src/test/results/clientpositive/spark/union_remove_17.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_17.q.out @@ -47,6 +47,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -62,15 +64,13 @@ STAGE PLANS: expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2 Data size: 600 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 2 Data size: 600 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 - Map 2 + value expressions: _col0 (type: string), _col1 (type: bigint) + Map 3 Map Operator Tree: TableScan alias: inputtbl1 @@ -83,14 +83,27 @@ STAGE PLANS: expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2 Data size: 600 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 2 Data size: 600 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 + value expressions: _col0 (type: string), _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint), KEY._col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 2 Data size: 600 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/union_remove_18.q.out ql/src/test/results/clientpositive/spark/union_remove_18.q.out index 1c570c9e4b6d203a8b64f645a74ab62ef8966e24..2472dfeb5d44a3805cc5180c86c02bafe17d80a4 100644 --- ql/src/test/results/clientpositive/spark/union_remove_18.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_18.q.out @@ -49,7 +49,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) - Reducer 4 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -85,34 +85,27 @@ STAGE PLANS: expressions: _col0 (type: string), _col2 (type: bigint), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 300 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 2 Data size: 600 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - Reducer 4 + value expressions: _col0 (type: string), _col1 (type: bigint) + Reducer 3 Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint), KEY._col2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 300 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: bigint), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 300 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 600 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + Statistics: Num rows: 2 Data size: 600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 2 Data size: 600 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Stage: Stage-0 Move Operator @@ -184,7 +177,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - numFiles 12 + numFiles 6 numPartitions 6 numRows 0 rawDataSize 0 diff --git ql/src/test/results/clientpositive/spark/union_remove_25.q.out ql/src/test/results/clientpositive/spark/union_remove_25.q.out index 6363ee671ad624662fc42bbfb5cb1ee6c3d9148e..2f29ed14d8d4d2dec91e93a2a687f03e5f5fe7a7 100644 --- ql/src/test/results/clientpositive/spark/union_remove_25.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_25.q.out @@ -404,7 +404,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 4 <- Map 1 (GROUP, 1) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -437,35 +437,27 @@ STAGE PLANS: expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint), '2008-04-08' (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col2 (type: string), _col3 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string), _col3 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl3 - Reducer 4 + value expressions: _col0 (type: string), _col1 (type: bigint) + Reducer 3 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 1000 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint), '2008-04-08' (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl3 + expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint), KEY._col2 (type: string), KEY._col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl3 Stage: Stage-0 Move Operator @@ -538,7 +530,7 @@ Database: default Table: outputtbl3 #### A masked pattern was here #### Partition Parameters: - numFiles 2 + numFiles 1 totalSize 6812 #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/spark/union_view.q.out ql/src/test/results/clientpositive/spark/union_view.q.out index 1b73ddb824c2aaca85498351fe26f31b0efa223c..9a5f900d8bc291d8256b8049f84aae8246476c7e 100644 --- ql/src/test/results/clientpositive/spark/union_view.q.out +++ ql/src/test/results/clientpositive/spark/union_view.q.out @@ -492,11 +492,11 @@ STAGE PLANS: 86 val_86 2 86 val_86 3 86 val_86 3 +86 val_86 2 +86 val_86 2 86 val_86 3 86 val_86 3 86 val_86 1 -86 val_86 2 -86 val_86 2 STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1